sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *ALTERABLES, 573 *CREATABLES, 574 *SUBQUERY_PREDICATES, 575 *TYPE_TOKENS, 576 *NO_PAREN_FUNCTIONS, 577 } 578 ID_VAR_TOKENS.remove(TokenType.UNION) 579 580 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 581 TokenType.ANTI, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.UTC_DATE, 647 TokenType.UTC_TIME, 648 TokenType.UTC_TIMESTAMP, 649 TokenType.WINDOW, 650 TokenType.XOR, 651 *TYPE_TOKENS, 652 *SUBQUERY_PREDICATES, 653 } 654 655 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.AND: exp.And, 657 } 658 659 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.COLON_EQ: exp.PropertyEQ, 661 } 662 663 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 664 TokenType.OR: exp.Or, 665 } 666 667 EQUALITY = { 668 TokenType.EQ: exp.EQ, 669 TokenType.NEQ: exp.NEQ, 670 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 671 } 672 673 COMPARISON = { 674 TokenType.GT: exp.GT, 675 TokenType.GTE: exp.GTE, 676 TokenType.LT: exp.LT, 677 TokenType.LTE: exp.LTE, 678 } 679 680 BITWISE = { 681 TokenType.AMP: exp.BitwiseAnd, 682 TokenType.CARET: exp.BitwiseXor, 683 TokenType.PIPE: exp.BitwiseOr, 684 } 685 686 TERM = { 687 TokenType.DASH: exp.Sub, 688 TokenType.PLUS: exp.Add, 689 TokenType.MOD: exp.Mod, 690 TokenType.COLLATE: exp.Collate, 691 } 692 693 FACTOR = { 694 TokenType.DIV: exp.IntDiv, 695 TokenType.LR_ARROW: exp.Distance, 696 TokenType.SLASH: exp.Div, 697 TokenType.STAR: exp.Mul, 698 } 699 700 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 701 702 TIMES = { 703 TokenType.TIME, 704 TokenType.TIMETZ, 705 } 706 707 TIMESTAMPS = { 708 TokenType.TIMESTAMP, 709 TokenType.TIMESTAMPNTZ, 710 TokenType.TIMESTAMPTZ, 711 TokenType.TIMESTAMPLTZ, 712 *TIMES, 713 } 714 715 SET_OPERATIONS = { 716 TokenType.UNION, 717 TokenType.INTERSECT, 718 TokenType.EXCEPT, 719 } 720 721 JOIN_METHODS = { 722 TokenType.ASOF, 723 TokenType.NATURAL, 724 TokenType.POSITIONAL, 725 } 726 727 JOIN_SIDES = { 728 TokenType.LEFT, 729 TokenType.RIGHT, 730 TokenType.FULL, 731 } 732 733 JOIN_KINDS = { 734 TokenType.ANTI, 735 TokenType.CROSS, 736 TokenType.INNER, 737 TokenType.OUTER, 738 TokenType.SEMI, 739 TokenType.STRAIGHT_JOIN, 740 } 741 742 JOIN_HINTS: t.Set[str] = set() 743 744 LAMBDAS = { 745 TokenType.ARROW: lambda self, expressions: self.expression( 746 exp.Lambda, 747 this=self._replace_lambda( 748 self._parse_assignment(), 749 expressions, 750 ), 751 expressions=expressions, 752 ), 753 TokenType.FARROW: lambda self, expressions: self.expression( 754 exp.Kwarg, 755 this=exp.var(expressions[0].name), 756 expression=self._parse_assignment(), 757 ), 758 } 759 760 COLUMN_OPERATORS = { 761 TokenType.DOT: None, 762 TokenType.DOTCOLON: lambda self, this, to: self.expression( 763 exp.JSONCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.DCOLON: lambda self, this, to: self.build_cast( 768 strict=self.STRICT_CAST, this=this, to=to 769 ), 770 TokenType.ARROW: lambda self, this, path: self.expression( 771 exp.JSONExtract, 772 this=this, 773 expression=self.dialect.to_json_path(path), 774 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 775 ), 776 TokenType.DARROW: lambda self, this, path: self.expression( 777 exp.JSONExtractScalar, 778 this=this, 779 expression=self.dialect.to_json_path(path), 780 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 781 ), 782 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtract, 784 this=this, 785 expression=path, 786 ), 787 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 788 exp.JSONBExtractScalar, 789 this=this, 790 expression=path, 791 ), 792 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 793 exp.JSONBContains, 794 this=this, 795 expression=key, 796 ), 797 } 798 799 CAST_COLUMN_OPERATORS = { 800 TokenType.DOTCOLON, 801 TokenType.DCOLON, 802 } 803 804 EXPRESSION_PARSERS = { 805 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 806 exp.Column: lambda self: self._parse_column(), 807 exp.Condition: lambda self: self._parse_assignment(), 808 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 809 exp.Expression: lambda self: self._parse_expression(), 810 exp.From: lambda self: self._parse_from(joins=True), 811 exp.Group: lambda self: self._parse_group(), 812 exp.Having: lambda self: self._parse_having(), 813 exp.Hint: lambda self: self._parse_hint_body(), 814 exp.Identifier: lambda self: self._parse_id_var(), 815 exp.Join: lambda self: self._parse_join(), 816 exp.Lambda: lambda self: self._parse_lambda(), 817 exp.Lateral: lambda self: self._parse_lateral(), 818 exp.Limit: lambda self: self._parse_limit(), 819 exp.Offset: lambda self: self._parse_offset(), 820 exp.Order: lambda self: self._parse_order(), 821 exp.Ordered: lambda self: self._parse_ordered(), 822 exp.Properties: lambda self: self._parse_properties(), 823 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 824 exp.Qualify: lambda self: self._parse_qualify(), 825 exp.Returning: lambda self: self._parse_returning(), 826 exp.Select: lambda self: self._parse_select(), 827 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 828 exp.Table: lambda self: self._parse_table_parts(), 829 exp.TableAlias: lambda self: self._parse_table_alias(), 830 exp.Tuple: lambda self: self._parse_value(values=False), 831 exp.Whens: lambda self: self._parse_when_matched(), 832 exp.Where: lambda self: self._parse_where(), 833 exp.Window: lambda self: self._parse_named_window(), 834 exp.With: lambda self: self._parse_with(), 835 "JOIN_TYPE": lambda self: self._parse_join_parts(), 836 } 837 838 STATEMENT_PARSERS = { 839 TokenType.ALTER: lambda self: self._parse_alter(), 840 TokenType.ANALYZE: lambda self: self._parse_analyze(), 841 TokenType.BEGIN: lambda self: self._parse_transaction(), 842 TokenType.CACHE: lambda self: self._parse_cache(), 843 TokenType.COMMENT: lambda self: self._parse_comment(), 844 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 845 TokenType.COPY: lambda self: self._parse_copy(), 846 TokenType.CREATE: lambda self: self._parse_create(), 847 TokenType.DELETE: lambda self: self._parse_delete(), 848 TokenType.DESC: lambda self: self._parse_describe(), 849 TokenType.DESCRIBE: lambda self: self._parse_describe(), 850 TokenType.DROP: lambda self: self._parse_drop(), 851 TokenType.GRANT: lambda self: self._parse_grant(), 852 TokenType.REVOKE: lambda self: self._parse_revoke(), 853 TokenType.INSERT: lambda self: self._parse_insert(), 854 TokenType.KILL: lambda self: self._parse_kill(), 855 TokenType.LOAD: lambda self: self._parse_load(), 856 TokenType.MERGE: lambda self: self._parse_merge(), 857 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 858 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 859 TokenType.REFRESH: lambda self: self._parse_refresh(), 860 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 861 TokenType.SET: lambda self: self._parse_set(), 862 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 863 TokenType.UNCACHE: lambda self: self._parse_uncache(), 864 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 865 TokenType.UPDATE: lambda self: self._parse_update(), 866 TokenType.USE: lambda self: self._parse_use(), 867 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 868 } 869 870 UNARY_PARSERS = { 871 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 872 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 873 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 874 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 875 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 876 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 877 } 878 879 STRING_PARSERS = { 880 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 881 exp.RawString, this=token.text 882 ), 883 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 884 exp.National, this=token.text 885 ), 886 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 887 TokenType.STRING: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=True 889 ), 890 TokenType.UNICODE_STRING: lambda self, token: self.expression( 891 exp.UnicodeString, 892 this=token.text, 893 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 894 ), 895 } 896 897 NUMERIC_PARSERS = { 898 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 899 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 900 TokenType.HEX_STRING: lambda self, token: self.expression( 901 exp.HexString, 902 this=token.text, 903 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 904 ), 905 TokenType.NUMBER: lambda self, token: self.expression( 906 exp.Literal, this=token.text, is_string=False 907 ), 908 } 909 910 PRIMARY_PARSERS = { 911 **STRING_PARSERS, 912 **NUMERIC_PARSERS, 913 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 914 TokenType.NULL: lambda self, _: self.expression(exp.Null), 915 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 916 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 917 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 918 TokenType.STAR: lambda self, _: self._parse_star_ops(), 919 } 920 921 PLACEHOLDER_PARSERS = { 922 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 923 TokenType.PARAMETER: lambda self: self._parse_parameter(), 924 TokenType.COLON: lambda self: ( 925 self.expression(exp.Placeholder, this=self._prev.text) 926 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 927 else None 928 ), 929 } 930 931 RANGE_PARSERS = { 932 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 933 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 934 TokenType.GLOB: binary_range_parser(exp.Glob), 935 TokenType.ILIKE: binary_range_parser(exp.ILike), 936 TokenType.IN: lambda self, this: self._parse_in(this), 937 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 938 TokenType.IS: lambda self, this: self._parse_is(this), 939 TokenType.LIKE: binary_range_parser(exp.Like), 940 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 941 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 942 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 943 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 944 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 945 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 946 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 947 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 948 } 949 950 PIPE_SYNTAX_TRANSFORM_PARSERS = { 951 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 952 "AS": lambda self, query: self._build_pipe_cte( 953 query, [exp.Star()], self._parse_table_alias() 954 ), 955 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 956 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 957 "ORDER BY": lambda self, query: query.order_by( 958 self._parse_order(), append=False, copy=False 959 ), 960 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 961 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 962 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 963 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 964 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 965 } 966 967 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 968 "ALLOWED_VALUES": lambda self: self.expression( 969 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 970 ), 971 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 972 "AUTO": lambda self: self._parse_auto_property(), 973 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 974 "BACKUP": lambda self: self.expression( 975 exp.BackupProperty, this=self._parse_var(any_token=True) 976 ), 977 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 978 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 979 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHECKSUM": lambda self: self._parse_checksum(), 981 "CLUSTER BY": lambda self: self._parse_cluster(), 982 "CLUSTERED": lambda self: self._parse_clustered_by(), 983 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 984 exp.CollateProperty, **kwargs 985 ), 986 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 987 "CONTAINS": lambda self: self._parse_contains_property(), 988 "COPY": lambda self: self._parse_copy_property(), 989 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 990 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 991 "DEFINER": lambda self: self._parse_definer(), 992 "DETERMINISTIC": lambda self: self.expression( 993 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 994 ), 995 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 996 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 997 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 998 "DISTKEY": lambda self: self._parse_distkey(), 999 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1000 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1001 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1002 "ENVIRONMENT": lambda self: self.expression( 1003 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1004 ), 1005 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1006 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1007 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1008 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1009 "FREESPACE": lambda self: self._parse_freespace(), 1010 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1011 "HEAP": lambda self: self.expression(exp.HeapProperty), 1012 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1013 "IMMUTABLE": lambda self: self.expression( 1014 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1015 ), 1016 "INHERITS": lambda self: self.expression( 1017 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1018 ), 1019 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1020 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1021 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1022 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1023 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1024 "LIKE": lambda self: self._parse_create_like(), 1025 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1026 "LOCK": lambda self: self._parse_locking(), 1027 "LOCKING": lambda self: self._parse_locking(), 1028 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1029 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1030 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1031 "MODIFIES": lambda self: self._parse_modifies_property(), 1032 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1033 "NO": lambda self: self._parse_no_property(), 1034 "ON": lambda self: self._parse_on_property(), 1035 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1036 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1037 "PARTITION": lambda self: self._parse_partitioned_of(), 1038 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1039 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1041 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1042 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1043 "READS": lambda self: self._parse_reads_property(), 1044 "REMOTE": lambda self: self._parse_remote_with_connection(), 1045 "RETURNS": lambda self: self._parse_returns(), 1046 "STRICT": lambda self: self.expression(exp.StrictProperty), 1047 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1048 "ROW": lambda self: self._parse_row(), 1049 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1050 "SAMPLE": lambda self: self.expression( 1051 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1052 ), 1053 "SECURE": lambda self: self.expression(exp.SecureProperty), 1054 "SECURITY": lambda self: self._parse_security(), 1055 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1056 "SETTINGS": lambda self: self._parse_settings_property(), 1057 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1058 "SORTKEY": lambda self: self._parse_sortkey(), 1059 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1060 "STABLE": lambda self: self.expression( 1061 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1062 ), 1063 "STORED": lambda self: self._parse_stored(), 1064 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1065 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1066 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1067 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1068 "TO": lambda self: self._parse_to_table(), 1069 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1070 "TRANSFORM": lambda self: self.expression( 1071 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1072 ), 1073 "TTL": lambda self: self._parse_ttl(), 1074 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1075 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1076 "VOLATILE": lambda self: self._parse_volatile_property(), 1077 "WITH": lambda self: self._parse_with_property(), 1078 } 1079 1080 CONSTRAINT_PARSERS = { 1081 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1082 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1083 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1084 "CHARACTER SET": lambda self: self.expression( 1085 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "CHECK": lambda self: self.expression( 1088 exp.CheckColumnConstraint, 1089 this=self._parse_wrapped(self._parse_assignment), 1090 enforced=self._match_text_seq("ENFORCED"), 1091 ), 1092 "COLLATE": lambda self: self.expression( 1093 exp.CollateColumnConstraint, 1094 this=self._parse_identifier() or self._parse_column(), 1095 ), 1096 "COMMENT": lambda self: self.expression( 1097 exp.CommentColumnConstraint, this=self._parse_string() 1098 ), 1099 "COMPRESS": lambda self: self._parse_compress(), 1100 "CLUSTERED": lambda self: self.expression( 1101 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1102 ), 1103 "NONCLUSTERED": lambda self: self.expression( 1104 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1105 ), 1106 "DEFAULT": lambda self: self.expression( 1107 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1108 ), 1109 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1110 "EPHEMERAL": lambda self: self.expression( 1111 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1112 ), 1113 "EXCLUDE": lambda self: self.expression( 1114 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1115 ), 1116 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1117 "FORMAT": lambda self: self.expression( 1118 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1119 ), 1120 "GENERATED": lambda self: self._parse_generated_as_identity(), 1121 "IDENTITY": lambda self: self._parse_auto_increment(), 1122 "INLINE": lambda self: self._parse_inline(), 1123 "LIKE": lambda self: self._parse_create_like(), 1124 "NOT": lambda self: self._parse_not_constraint(), 1125 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1126 "ON": lambda self: ( 1127 self._match(TokenType.UPDATE) 1128 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1129 ) 1130 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1131 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1132 "PERIOD": lambda self: self._parse_period_for_system_time(), 1133 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1134 "REFERENCES": lambda self: self._parse_references(match=False), 1135 "TITLE": lambda self: self.expression( 1136 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1137 ), 1138 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1139 "UNIQUE": lambda self: self._parse_unique(), 1140 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1141 "WATERMARK": lambda self: self.expression( 1142 exp.WatermarkColumnConstraint, 1143 this=self._match(TokenType.FOR) and self._parse_column(), 1144 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1145 ), 1146 "WITH": lambda self: self.expression( 1147 exp.Properties, expressions=self._parse_wrapped_properties() 1148 ), 1149 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1150 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 } 1152 1153 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1154 if not self._match(TokenType.L_PAREN, advance=False): 1155 # Partitioning by bucket or truncate follows the syntax: 1156 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1157 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1158 self._retreat(self._index - 1) 1159 return None 1160 1161 klass = ( 1162 exp.PartitionedByBucket 1163 if self._prev.text.upper() == "BUCKET" 1164 else exp.PartitionByTruncate 1165 ) 1166 1167 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1168 this, expression = seq_get(args, 0), seq_get(args, 1) 1169 1170 if isinstance(this, exp.Literal): 1171 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1172 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1173 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1174 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1175 # 1176 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1177 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1178 this, expression = expression, this 1179 1180 return self.expression(klass, this=this, expression=expression) 1181 1182 ALTER_PARSERS = { 1183 "ADD": lambda self: self._parse_alter_table_add(), 1184 "AS": lambda self: self._parse_select(), 1185 "ALTER": lambda self: self._parse_alter_table_alter(), 1186 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1187 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1188 "DROP": lambda self: self._parse_alter_table_drop(), 1189 "RENAME": lambda self: self._parse_alter_table_rename(), 1190 "SET": lambda self: self._parse_alter_table_set(), 1191 "SWAP": lambda self: self.expression( 1192 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1193 ), 1194 } 1195 1196 ALTER_ALTER_PARSERS = { 1197 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1198 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1199 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1200 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1201 } 1202 1203 SCHEMA_UNNAMED_CONSTRAINTS = { 1204 "CHECK", 1205 "EXCLUDE", 1206 "FOREIGN KEY", 1207 "LIKE", 1208 "PERIOD", 1209 "PRIMARY KEY", 1210 "UNIQUE", 1211 "WATERMARK", 1212 "BUCKET", 1213 "TRUNCATE", 1214 } 1215 1216 NO_PAREN_FUNCTION_PARSERS = { 1217 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1218 "CASE": lambda self: self._parse_case(), 1219 "CONNECT_BY_ROOT": lambda self: self.expression( 1220 exp.ConnectByRoot, this=self._parse_column() 1221 ), 1222 "IF": lambda self: self._parse_if(), 1223 } 1224 1225 INVALID_FUNC_NAME_TOKENS = { 1226 TokenType.IDENTIFIER, 1227 TokenType.STRING, 1228 } 1229 1230 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1231 1232 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1233 1234 FUNCTION_PARSERS = { 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1237 }, 1238 **{ 1239 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1240 }, 1241 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1242 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1243 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1244 "DECODE": lambda self: self._parse_decode(), 1245 "EXTRACT": lambda self: self._parse_extract(), 1246 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1247 "GAP_FILL": lambda self: self._parse_gap_fill(), 1248 "JSON_OBJECT": lambda self: self._parse_json_object(), 1249 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1250 "JSON_TABLE": lambda self: self._parse_json_table(), 1251 "MATCH": lambda self: self._parse_match_against(), 1252 "NORMALIZE": lambda self: self._parse_normalize(), 1253 "OPENJSON": lambda self: self._parse_open_json(), 1254 "OVERLAY": lambda self: self._parse_overlay(), 1255 "POSITION": lambda self: self._parse_position(), 1256 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1257 "STRING_AGG": lambda self: self._parse_string_agg(), 1258 "SUBSTRING": lambda self: self._parse_substring(), 1259 "TRIM": lambda self: self._parse_trim(), 1260 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1261 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1262 "XMLELEMENT": lambda self: self.expression( 1263 exp.XMLElement, 1264 this=self._match_text_seq("NAME") and self._parse_id_var(), 1265 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1266 ), 1267 "XMLTABLE": lambda self: self._parse_xml_table(), 1268 } 1269 1270 QUERY_MODIFIER_PARSERS = { 1271 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1272 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1273 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1274 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1275 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1276 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1277 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1278 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1279 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1280 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1281 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1282 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1283 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1284 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1285 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.CLUSTER_BY: lambda self: ( 1287 "cluster", 1288 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1289 ), 1290 TokenType.DISTRIBUTE_BY: lambda self: ( 1291 "distribute", 1292 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1293 ), 1294 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1295 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1296 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1297 } 1298 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1299 1300 SET_PARSERS = { 1301 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1302 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1303 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1304 "TRANSACTION": lambda self: self._parse_set_transaction(), 1305 } 1306 1307 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1308 1309 TYPE_LITERAL_PARSERS = { 1310 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1311 } 1312 1313 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1314 1315 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1316 1317 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1318 1319 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1320 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1321 "ISOLATION": ( 1322 ("LEVEL", "REPEATABLE", "READ"), 1323 ("LEVEL", "READ", "COMMITTED"), 1324 ("LEVEL", "READ", "UNCOMITTED"), 1325 ("LEVEL", "SERIALIZABLE"), 1326 ), 1327 "READ": ("WRITE", "ONLY"), 1328 } 1329 1330 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1331 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1332 ) 1333 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1334 1335 CREATE_SEQUENCE: OPTIONS_TYPE = { 1336 "SCALE": ("EXTEND", "NOEXTEND"), 1337 "SHARD": ("EXTEND", "NOEXTEND"), 1338 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1339 **dict.fromkeys( 1340 ( 1341 "SESSION", 1342 "GLOBAL", 1343 "KEEP", 1344 "NOKEEP", 1345 "ORDER", 1346 "NOORDER", 1347 "NOCACHE", 1348 "CYCLE", 1349 "NOCYCLE", 1350 "NOMINVALUE", 1351 "NOMAXVALUE", 1352 "NOSCALE", 1353 "NOSHARD", 1354 ), 1355 tuple(), 1356 ), 1357 } 1358 1359 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1360 1361 USABLES: OPTIONS_TYPE = dict.fromkeys( 1362 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1363 ) 1364 1365 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1366 1367 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1368 "TYPE": ("EVOLUTION",), 1369 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1370 } 1371 1372 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1373 1374 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1375 1376 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1377 "NOT": ("ENFORCED",), 1378 "MATCH": ( 1379 "FULL", 1380 "PARTIAL", 1381 "SIMPLE", 1382 ), 1383 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1384 "USING": ( 1385 "BTREE", 1386 "HASH", 1387 ), 1388 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1389 } 1390 1391 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1392 "NO": ("OTHERS",), 1393 "CURRENT": ("ROW",), 1394 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1395 } 1396 1397 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1398 1399 CLONE_KEYWORDS = {"CLONE", "COPY"} 1400 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1401 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1402 1403 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1404 1405 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1406 1407 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1408 1409 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1410 1411 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1412 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1413 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1414 1415 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1416 1417 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1418 1419 ADD_CONSTRAINT_TOKENS = { 1420 TokenType.CONSTRAINT, 1421 TokenType.FOREIGN_KEY, 1422 TokenType.INDEX, 1423 TokenType.KEY, 1424 TokenType.PRIMARY_KEY, 1425 TokenType.UNIQUE, 1426 } 1427 1428 DISTINCT_TOKENS = {TokenType.DISTINCT} 1429 1430 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1431 1432 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1433 1434 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1435 1436 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1437 1438 ODBC_DATETIME_LITERALS = { 1439 "d": exp.Date, 1440 "t": exp.Time, 1441 "ts": exp.Timestamp, 1442 } 1443 1444 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1445 1446 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1447 1448 # The style options for the DESCRIBE statement 1449 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1450 1451 # The style options for the ANALYZE statement 1452 ANALYZE_STYLES = { 1453 "BUFFER_USAGE_LIMIT", 1454 "FULL", 1455 "LOCAL", 1456 "NO_WRITE_TO_BINLOG", 1457 "SAMPLE", 1458 "SKIP_LOCKED", 1459 "VERBOSE", 1460 } 1461 1462 ANALYZE_EXPRESSION_PARSERS = { 1463 "ALL": lambda self: self._parse_analyze_columns(), 1464 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1465 "DELETE": lambda self: self._parse_analyze_delete(), 1466 "DROP": lambda self: self._parse_analyze_histogram(), 1467 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1468 "LIST": lambda self: self._parse_analyze_list(), 1469 "PREDICATE": lambda self: self._parse_analyze_columns(), 1470 "UPDATE": lambda self: self._parse_analyze_histogram(), 1471 "VALIDATE": lambda self: self._parse_analyze_validate(), 1472 } 1473 1474 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1475 1476 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1477 1478 OPERATION_MODIFIERS: t.Set[str] = set() 1479 1480 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1481 1482 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1483 1484 STRICT_CAST = True 1485 1486 PREFIXED_PIVOT_COLUMNS = False 1487 IDENTIFY_PIVOT_STRINGS = False 1488 1489 LOG_DEFAULTS_TO_LN = False 1490 1491 # Whether the table sample clause expects CSV syntax 1492 TABLESAMPLE_CSV = False 1493 1494 # The default method used for table sampling 1495 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1496 1497 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1498 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1499 1500 # Whether the TRIM function expects the characters to trim as its first argument 1501 TRIM_PATTERN_FIRST = False 1502 1503 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1504 STRING_ALIASES = False 1505 1506 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1507 MODIFIERS_ATTACHED_TO_SET_OP = True 1508 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1509 1510 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1511 NO_PAREN_IF_COMMANDS = True 1512 1513 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1514 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1515 1516 # Whether the `:` operator is used to extract a value from a VARIANT column 1517 COLON_IS_VARIANT_EXTRACT = False 1518 1519 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1520 # If this is True and '(' is not found, the keyword will be treated as an identifier 1521 VALUES_FOLLOWED_BY_PAREN = True 1522 1523 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1524 SUPPORTS_IMPLICIT_UNNEST = False 1525 1526 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1527 INTERVAL_SPANS = True 1528 1529 # Whether a PARTITION clause can follow a table reference 1530 SUPPORTS_PARTITION_SELECTION = False 1531 1532 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1533 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1534 1535 # Whether the 'AS' keyword is optional in the CTE definition syntax 1536 OPTIONAL_ALIAS_TOKEN_CTE = True 1537 1538 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1539 ALTER_RENAME_REQUIRES_COLUMN = True 1540 1541 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1542 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1543 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1544 # as BigQuery, where all joins have the same precedence. 1545 JOINS_HAVE_EQUAL_PRECEDENCE = False 1546 1547 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1548 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1549 1550 # Whether map literals support arbitrary expressions as keys. 1551 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1552 # When False, keys are typically restricted to identifiers. 1553 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1554 1555 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1556 # is true for Snowflake but not for BigQuery which can also process strings 1557 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1558 1559 # Dialects like Databricks support JOINS without join criteria 1560 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1561 ADD_JOIN_ON_TRUE = False 1562 1563 __slots__ = ( 1564 "error_level", 1565 "error_message_context", 1566 "max_errors", 1567 "dialect", 1568 "sql", 1569 "errors", 1570 "_tokens", 1571 "_index", 1572 "_curr", 1573 "_next", 1574 "_prev", 1575 "_prev_comments", 1576 "_pipe_cte_counter", 1577 ) 1578 1579 # Autofilled 1580 SHOW_TRIE: t.Dict = {} 1581 SET_TRIE: t.Dict = {} 1582 1583 def __init__( 1584 self, 1585 error_level: t.Optional[ErrorLevel] = None, 1586 error_message_context: int = 100, 1587 max_errors: int = 3, 1588 dialect: DialectType = None, 1589 ): 1590 from sqlglot.dialects import Dialect 1591 1592 self.error_level = error_level or ErrorLevel.IMMEDIATE 1593 self.error_message_context = error_message_context 1594 self.max_errors = max_errors 1595 self.dialect = Dialect.get_or_raise(dialect) 1596 self.reset() 1597 1598 def reset(self): 1599 self.sql = "" 1600 self.errors = [] 1601 self._tokens = [] 1602 self._index = 0 1603 self._curr = None 1604 self._next = None 1605 self._prev = None 1606 self._prev_comments = None 1607 self._pipe_cte_counter = 0 1608 1609 def parse( 1610 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1611 ) -> t.List[t.Optional[exp.Expression]]: 1612 """ 1613 Parses a list of tokens and returns a list of syntax trees, one tree 1614 per parsed SQL statement. 1615 1616 Args: 1617 raw_tokens: The list of tokens. 1618 sql: The original SQL string, used to produce helpful debug messages. 1619 1620 Returns: 1621 The list of the produced syntax trees. 1622 """ 1623 return self._parse( 1624 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1625 ) 1626 1627 def parse_into( 1628 self, 1629 expression_types: exp.IntoType, 1630 raw_tokens: t.List[Token], 1631 sql: t.Optional[str] = None, 1632 ) -> t.List[t.Optional[exp.Expression]]: 1633 """ 1634 Parses a list of tokens into a given Expression type. If a collection of Expression 1635 types is given instead, this method will try to parse the token list into each one 1636 of them, stopping at the first for which the parsing succeeds. 1637 1638 Args: 1639 expression_types: The expression type(s) to try and parse the token list into. 1640 raw_tokens: The list of tokens. 1641 sql: The original SQL string, used to produce helpful debug messages. 1642 1643 Returns: 1644 The target Expression. 1645 """ 1646 errors = [] 1647 for expression_type in ensure_list(expression_types): 1648 parser = self.EXPRESSION_PARSERS.get(expression_type) 1649 if not parser: 1650 raise TypeError(f"No parser registered for {expression_type}") 1651 1652 try: 1653 return self._parse(parser, raw_tokens, sql) 1654 except ParseError as e: 1655 e.errors[0]["into_expression"] = expression_type 1656 errors.append(e) 1657 1658 raise ParseError( 1659 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1660 errors=merge_errors(errors), 1661 ) from errors[-1] 1662 1663 def _parse( 1664 self, 1665 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1666 raw_tokens: t.List[Token], 1667 sql: t.Optional[str] = None, 1668 ) -> t.List[t.Optional[exp.Expression]]: 1669 self.reset() 1670 self.sql = sql or "" 1671 1672 total = len(raw_tokens) 1673 chunks: t.List[t.List[Token]] = [[]] 1674 1675 for i, token in enumerate(raw_tokens): 1676 if token.token_type == TokenType.SEMICOLON: 1677 if token.comments: 1678 chunks.append([token]) 1679 1680 if i < total - 1: 1681 chunks.append([]) 1682 else: 1683 chunks[-1].append(token) 1684 1685 expressions = [] 1686 1687 for tokens in chunks: 1688 self._index = -1 1689 self._tokens = tokens 1690 self._advance() 1691 1692 expressions.append(parse_method(self)) 1693 1694 if self._index < len(self._tokens): 1695 self.raise_error("Invalid expression / Unexpected token") 1696 1697 self.check_errors() 1698 1699 return expressions 1700 1701 def check_errors(self) -> None: 1702 """Logs or raises any found errors, depending on the chosen error level setting.""" 1703 if self.error_level == ErrorLevel.WARN: 1704 for error in self.errors: 1705 logger.error(str(error)) 1706 elif self.error_level == ErrorLevel.RAISE and self.errors: 1707 raise ParseError( 1708 concat_messages(self.errors, self.max_errors), 1709 errors=merge_errors(self.errors), 1710 ) 1711 1712 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1713 """ 1714 Appends an error in the list of recorded errors or raises it, depending on the chosen 1715 error level setting. 1716 """ 1717 token = token or self._curr or self._prev or Token.string("") 1718 start = token.start 1719 end = token.end + 1 1720 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1721 highlight = self.sql[start:end] 1722 end_context = self.sql[end : end + self.error_message_context] 1723 1724 error = ParseError.new( 1725 f"{message}. Line {token.line}, Col: {token.col}.\n" 1726 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1727 description=message, 1728 line=token.line, 1729 col=token.col, 1730 start_context=start_context, 1731 highlight=highlight, 1732 end_context=end_context, 1733 ) 1734 1735 if self.error_level == ErrorLevel.IMMEDIATE: 1736 raise error 1737 1738 self.errors.append(error) 1739 1740 def expression( 1741 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1742 ) -> E: 1743 """ 1744 Creates a new, validated Expression. 1745 1746 Args: 1747 exp_class: The expression class to instantiate. 1748 comments: An optional list of comments to attach to the expression. 1749 kwargs: The arguments to set for the expression along with their respective values. 1750 1751 Returns: 1752 The target expression. 1753 """ 1754 instance = exp_class(**kwargs) 1755 instance.add_comments(comments) if comments else self._add_comments(instance) 1756 return self.validate_expression(instance) 1757 1758 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1759 if expression and self._prev_comments: 1760 expression.add_comments(self._prev_comments) 1761 self._prev_comments = None 1762 1763 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1764 """ 1765 Validates an Expression, making sure that all its mandatory arguments are set. 1766 1767 Args: 1768 expression: The expression to validate. 1769 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1770 1771 Returns: 1772 The validated expression. 1773 """ 1774 if self.error_level != ErrorLevel.IGNORE: 1775 for error_message in expression.error_messages(args): 1776 self.raise_error(error_message) 1777 1778 return expression 1779 1780 def _find_sql(self, start: Token, end: Token) -> str: 1781 return self.sql[start.start : end.end + 1] 1782 1783 def _is_connected(self) -> bool: 1784 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1785 1786 def _advance(self, times: int = 1) -> None: 1787 self._index += times 1788 self._curr = seq_get(self._tokens, self._index) 1789 self._next = seq_get(self._tokens, self._index + 1) 1790 1791 if self._index > 0: 1792 self._prev = self._tokens[self._index - 1] 1793 self._prev_comments = self._prev.comments 1794 else: 1795 self._prev = None 1796 self._prev_comments = None 1797 1798 def _retreat(self, index: int) -> None: 1799 if index != self._index: 1800 self._advance(index - self._index) 1801 1802 def _warn_unsupported(self) -> None: 1803 if len(self._tokens) <= 1: 1804 return 1805 1806 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1807 # interested in emitting a warning for the one being currently processed. 1808 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1809 1810 logger.warning( 1811 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1812 ) 1813 1814 def _parse_command(self) -> exp.Command: 1815 self._warn_unsupported() 1816 return self.expression( 1817 exp.Command, 1818 comments=self._prev_comments, 1819 this=self._prev.text.upper(), 1820 expression=self._parse_string(), 1821 ) 1822 1823 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1824 """ 1825 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1826 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1827 solve this by setting & resetting the parser state accordingly 1828 """ 1829 index = self._index 1830 error_level = self.error_level 1831 1832 self.error_level = ErrorLevel.IMMEDIATE 1833 try: 1834 this = parse_method() 1835 except ParseError: 1836 this = None 1837 finally: 1838 if not this or retreat: 1839 self._retreat(index) 1840 self.error_level = error_level 1841 1842 return this 1843 1844 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1845 start = self._prev 1846 exists = self._parse_exists() if allow_exists else None 1847 1848 self._match(TokenType.ON) 1849 1850 materialized = self._match_text_seq("MATERIALIZED") 1851 kind = self._match_set(self.CREATABLES) and self._prev 1852 if not kind: 1853 return self._parse_as_command(start) 1854 1855 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1856 this = self._parse_user_defined_function(kind=kind.token_type) 1857 elif kind.token_type == TokenType.TABLE: 1858 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1859 elif kind.token_type == TokenType.COLUMN: 1860 this = self._parse_column() 1861 else: 1862 this = self._parse_id_var() 1863 1864 self._match(TokenType.IS) 1865 1866 return self.expression( 1867 exp.Comment, 1868 this=this, 1869 kind=kind.text, 1870 expression=self._parse_string(), 1871 exists=exists, 1872 materialized=materialized, 1873 ) 1874 1875 def _parse_to_table( 1876 self, 1877 ) -> exp.ToTableProperty: 1878 table = self._parse_table_parts(schema=True) 1879 return self.expression(exp.ToTableProperty, this=table) 1880 1881 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1882 def _parse_ttl(self) -> exp.Expression: 1883 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1884 this = self._parse_bitwise() 1885 1886 if self._match_text_seq("DELETE"): 1887 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1888 if self._match_text_seq("RECOMPRESS"): 1889 return self.expression( 1890 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1891 ) 1892 if self._match_text_seq("TO", "DISK"): 1893 return self.expression( 1894 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1895 ) 1896 if self._match_text_seq("TO", "VOLUME"): 1897 return self.expression( 1898 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1899 ) 1900 1901 return this 1902 1903 expressions = self._parse_csv(_parse_ttl_action) 1904 where = self._parse_where() 1905 group = self._parse_group() 1906 1907 aggregates = None 1908 if group and self._match(TokenType.SET): 1909 aggregates = self._parse_csv(self._parse_set_item) 1910 1911 return self.expression( 1912 exp.MergeTreeTTL, 1913 expressions=expressions, 1914 where=where, 1915 group=group, 1916 aggregates=aggregates, 1917 ) 1918 1919 def _parse_statement(self) -> t.Optional[exp.Expression]: 1920 if self._curr is None: 1921 return None 1922 1923 if self._match_set(self.STATEMENT_PARSERS): 1924 comments = self._prev_comments 1925 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1926 stmt.add_comments(comments, prepend=True) 1927 return stmt 1928 1929 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1930 return self._parse_command() 1931 1932 expression = self._parse_expression() 1933 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1934 return self._parse_query_modifiers(expression) 1935 1936 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1937 start = self._prev 1938 temporary = self._match(TokenType.TEMPORARY) 1939 materialized = self._match_text_seq("MATERIALIZED") 1940 1941 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1942 if not kind: 1943 return self._parse_as_command(start) 1944 1945 concurrently = self._match_text_seq("CONCURRENTLY") 1946 if_exists = exists or self._parse_exists() 1947 1948 if kind == "COLUMN": 1949 this = self._parse_column() 1950 else: 1951 this = self._parse_table_parts( 1952 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1953 ) 1954 1955 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1956 1957 if self._match(TokenType.L_PAREN, advance=False): 1958 expressions = self._parse_wrapped_csv(self._parse_types) 1959 else: 1960 expressions = None 1961 1962 return self.expression( 1963 exp.Drop, 1964 exists=if_exists, 1965 this=this, 1966 expressions=expressions, 1967 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1968 temporary=temporary, 1969 materialized=materialized, 1970 cascade=self._match_text_seq("CASCADE"), 1971 constraints=self._match_text_seq("CONSTRAINTS"), 1972 purge=self._match_text_seq("PURGE"), 1973 cluster=cluster, 1974 concurrently=concurrently, 1975 ) 1976 1977 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1978 return ( 1979 self._match_text_seq("IF") 1980 and (not not_ or self._match(TokenType.NOT)) 1981 and self._match(TokenType.EXISTS) 1982 ) 1983 1984 def _parse_create(self) -> exp.Create | exp.Command: 1985 # Note: this can't be None because we've matched a statement parser 1986 start = self._prev 1987 1988 replace = ( 1989 start.token_type == TokenType.REPLACE 1990 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1991 or self._match_pair(TokenType.OR, TokenType.ALTER) 1992 ) 1993 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1994 1995 unique = self._match(TokenType.UNIQUE) 1996 1997 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1998 clustered = True 1999 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2000 "COLUMNSTORE" 2001 ): 2002 clustered = False 2003 else: 2004 clustered = None 2005 2006 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2007 self._advance() 2008 2009 properties = None 2010 create_token = self._match_set(self.CREATABLES) and self._prev 2011 2012 if not create_token: 2013 # exp.Properties.Location.POST_CREATE 2014 properties = self._parse_properties() 2015 create_token = self._match_set(self.CREATABLES) and self._prev 2016 2017 if not properties or not create_token: 2018 return self._parse_as_command(start) 2019 2020 concurrently = self._match_text_seq("CONCURRENTLY") 2021 exists = self._parse_exists(not_=True) 2022 this = None 2023 expression: t.Optional[exp.Expression] = None 2024 indexes = None 2025 no_schema_binding = None 2026 begin = None 2027 end = None 2028 clone = None 2029 2030 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2031 nonlocal properties 2032 if properties and temp_props: 2033 properties.expressions.extend(temp_props.expressions) 2034 elif temp_props: 2035 properties = temp_props 2036 2037 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2038 this = self._parse_user_defined_function(kind=create_token.token_type) 2039 2040 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2041 extend_props(self._parse_properties()) 2042 2043 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2044 extend_props(self._parse_properties()) 2045 2046 if not expression: 2047 if self._match(TokenType.COMMAND): 2048 expression = self._parse_as_command(self._prev) 2049 else: 2050 begin = self._match(TokenType.BEGIN) 2051 return_ = self._match_text_seq("RETURN") 2052 2053 if self._match(TokenType.STRING, advance=False): 2054 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2055 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2056 expression = self._parse_string() 2057 extend_props(self._parse_properties()) 2058 else: 2059 expression = self._parse_user_defined_function_expression() 2060 2061 end = self._match_text_seq("END") 2062 2063 if return_: 2064 expression = self.expression(exp.Return, this=expression) 2065 elif create_token.token_type == TokenType.INDEX: 2066 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2067 if not self._match(TokenType.ON): 2068 index = self._parse_id_var() 2069 anonymous = False 2070 else: 2071 index = None 2072 anonymous = True 2073 2074 this = self._parse_index(index=index, anonymous=anonymous) 2075 elif create_token.token_type in self.DB_CREATABLES: 2076 table_parts = self._parse_table_parts( 2077 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2078 ) 2079 2080 # exp.Properties.Location.POST_NAME 2081 self._match(TokenType.COMMA) 2082 extend_props(self._parse_properties(before=True)) 2083 2084 this = self._parse_schema(this=table_parts) 2085 2086 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2087 extend_props(self._parse_properties()) 2088 2089 has_alias = self._match(TokenType.ALIAS) 2090 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2091 # exp.Properties.Location.POST_ALIAS 2092 extend_props(self._parse_properties()) 2093 2094 if create_token.token_type == TokenType.SEQUENCE: 2095 expression = self._parse_types() 2096 props = self._parse_properties() 2097 if props: 2098 sequence_props = exp.SequenceProperties() 2099 options = [] 2100 for prop in props: 2101 if isinstance(prop, exp.SequenceProperties): 2102 for arg, value in prop.args.items(): 2103 if arg == "options": 2104 options.extend(value) 2105 else: 2106 sequence_props.set(arg, value) 2107 prop.pop() 2108 2109 if options: 2110 sequence_props.set("options", options) 2111 2112 props.append("expressions", sequence_props) 2113 extend_props(props) 2114 else: 2115 expression = self._parse_ddl_select() 2116 2117 # Some dialects also support using a table as an alias instead of a SELECT. 2118 # Here we fallback to this as an alternative. 2119 if not expression and has_alias: 2120 expression = self._try_parse(self._parse_table_parts) 2121 2122 if create_token.token_type == TokenType.TABLE: 2123 # exp.Properties.Location.POST_EXPRESSION 2124 extend_props(self._parse_properties()) 2125 2126 indexes = [] 2127 while True: 2128 index = self._parse_index() 2129 2130 # exp.Properties.Location.POST_INDEX 2131 extend_props(self._parse_properties()) 2132 if not index: 2133 break 2134 else: 2135 self._match(TokenType.COMMA) 2136 indexes.append(index) 2137 elif create_token.token_type == TokenType.VIEW: 2138 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2139 no_schema_binding = True 2140 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2141 extend_props(self._parse_properties()) 2142 2143 shallow = self._match_text_seq("SHALLOW") 2144 2145 if self._match_texts(self.CLONE_KEYWORDS): 2146 copy = self._prev.text.lower() == "copy" 2147 clone = self.expression( 2148 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2149 ) 2150 2151 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2152 return self._parse_as_command(start) 2153 2154 create_kind_text = create_token.text.upper() 2155 return self.expression( 2156 exp.Create, 2157 this=this, 2158 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2159 replace=replace, 2160 refresh=refresh, 2161 unique=unique, 2162 expression=expression, 2163 exists=exists, 2164 properties=properties, 2165 indexes=indexes, 2166 no_schema_binding=no_schema_binding, 2167 begin=begin, 2168 end=end, 2169 clone=clone, 2170 concurrently=concurrently, 2171 clustered=clustered, 2172 ) 2173 2174 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2175 seq = exp.SequenceProperties() 2176 2177 options = [] 2178 index = self._index 2179 2180 while self._curr: 2181 self._match(TokenType.COMMA) 2182 if self._match_text_seq("INCREMENT"): 2183 self._match_text_seq("BY") 2184 self._match_text_seq("=") 2185 seq.set("increment", self._parse_term()) 2186 elif self._match_text_seq("MINVALUE"): 2187 seq.set("minvalue", self._parse_term()) 2188 elif self._match_text_seq("MAXVALUE"): 2189 seq.set("maxvalue", self._parse_term()) 2190 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2191 self._match_text_seq("=") 2192 seq.set("start", self._parse_term()) 2193 elif self._match_text_seq("CACHE"): 2194 # T-SQL allows empty CACHE which is initialized dynamically 2195 seq.set("cache", self._parse_number() or True) 2196 elif self._match_text_seq("OWNED", "BY"): 2197 # "OWNED BY NONE" is the default 2198 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2199 else: 2200 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2201 if opt: 2202 options.append(opt) 2203 else: 2204 break 2205 2206 seq.set("options", options if options else None) 2207 return None if self._index == index else seq 2208 2209 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2210 # only used for teradata currently 2211 self._match(TokenType.COMMA) 2212 2213 kwargs = { 2214 "no": self._match_text_seq("NO"), 2215 "dual": self._match_text_seq("DUAL"), 2216 "before": self._match_text_seq("BEFORE"), 2217 "default": self._match_text_seq("DEFAULT"), 2218 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2219 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2220 "after": self._match_text_seq("AFTER"), 2221 "minimum": self._match_texts(("MIN", "MINIMUM")), 2222 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2223 } 2224 2225 if self._match_texts(self.PROPERTY_PARSERS): 2226 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2227 try: 2228 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2229 except TypeError: 2230 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2231 2232 return None 2233 2234 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2235 return self._parse_wrapped_csv(self._parse_property) 2236 2237 def _parse_property(self) -> t.Optional[exp.Expression]: 2238 if self._match_texts(self.PROPERTY_PARSERS): 2239 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2240 2241 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2242 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2243 2244 if self._match_text_seq("COMPOUND", "SORTKEY"): 2245 return self._parse_sortkey(compound=True) 2246 2247 if self._match_text_seq("SQL", "SECURITY"): 2248 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2249 2250 index = self._index 2251 2252 seq_props = self._parse_sequence_properties() 2253 if seq_props: 2254 return seq_props 2255 2256 self._retreat(index) 2257 key = self._parse_column() 2258 2259 if not self._match(TokenType.EQ): 2260 self._retreat(index) 2261 return None 2262 2263 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2264 if isinstance(key, exp.Column): 2265 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2266 2267 value = self._parse_bitwise() or self._parse_var(any_token=True) 2268 2269 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2270 if isinstance(value, exp.Column): 2271 value = exp.var(value.name) 2272 2273 return self.expression(exp.Property, this=key, value=value) 2274 2275 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2276 if self._match_text_seq("BY"): 2277 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2278 2279 self._match(TokenType.ALIAS) 2280 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2281 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2282 2283 return self.expression( 2284 exp.FileFormatProperty, 2285 this=( 2286 self.expression( 2287 exp.InputOutputFormat, 2288 input_format=input_format, 2289 output_format=output_format, 2290 ) 2291 if input_format or output_format 2292 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2293 ), 2294 hive_format=True, 2295 ) 2296 2297 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2298 field = self._parse_field() 2299 if isinstance(field, exp.Identifier) and not field.quoted: 2300 field = exp.var(field) 2301 2302 return field 2303 2304 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2305 self._match(TokenType.EQ) 2306 self._match(TokenType.ALIAS) 2307 2308 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2309 2310 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2311 properties = [] 2312 while True: 2313 if before: 2314 prop = self._parse_property_before() 2315 else: 2316 prop = self._parse_property() 2317 if not prop: 2318 break 2319 for p in ensure_list(prop): 2320 properties.append(p) 2321 2322 if properties: 2323 return self.expression(exp.Properties, expressions=properties) 2324 2325 return None 2326 2327 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2328 return self.expression( 2329 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2330 ) 2331 2332 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2333 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2334 security_specifier = self._prev.text.upper() 2335 return self.expression(exp.SecurityProperty, this=security_specifier) 2336 return None 2337 2338 def _parse_settings_property(self) -> exp.SettingsProperty: 2339 return self.expression( 2340 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2341 ) 2342 2343 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2344 if self._index >= 2: 2345 pre_volatile_token = self._tokens[self._index - 2] 2346 else: 2347 pre_volatile_token = None 2348 2349 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2350 return exp.VolatileProperty() 2351 2352 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2353 2354 def _parse_retention_period(self) -> exp.Var: 2355 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2356 number = self._parse_number() 2357 number_str = f"{number} " if number else "" 2358 unit = self._parse_var(any_token=True) 2359 return exp.var(f"{number_str}{unit}") 2360 2361 def _parse_system_versioning_property( 2362 self, with_: bool = False 2363 ) -> exp.WithSystemVersioningProperty: 2364 self._match(TokenType.EQ) 2365 prop = self.expression( 2366 exp.WithSystemVersioningProperty, 2367 **{ # type: ignore 2368 "on": True, 2369 "with": with_, 2370 }, 2371 ) 2372 2373 if self._match_text_seq("OFF"): 2374 prop.set("on", False) 2375 return prop 2376 2377 self._match(TokenType.ON) 2378 if self._match(TokenType.L_PAREN): 2379 while self._curr and not self._match(TokenType.R_PAREN): 2380 if self._match_text_seq("HISTORY_TABLE", "="): 2381 prop.set("this", self._parse_table_parts()) 2382 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2383 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2384 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2385 prop.set("retention_period", self._parse_retention_period()) 2386 2387 self._match(TokenType.COMMA) 2388 2389 return prop 2390 2391 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2392 self._match(TokenType.EQ) 2393 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2394 prop = self.expression(exp.DataDeletionProperty, on=on) 2395 2396 if self._match(TokenType.L_PAREN): 2397 while self._curr and not self._match(TokenType.R_PAREN): 2398 if self._match_text_seq("FILTER_COLUMN", "="): 2399 prop.set("filter_column", self._parse_column()) 2400 elif self._match_text_seq("RETENTION_PERIOD", "="): 2401 prop.set("retention_period", self._parse_retention_period()) 2402 2403 self._match(TokenType.COMMA) 2404 2405 return prop 2406 2407 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2408 kind = "HASH" 2409 expressions: t.Optional[t.List[exp.Expression]] = None 2410 if self._match_text_seq("BY", "HASH"): 2411 expressions = self._parse_wrapped_csv(self._parse_id_var) 2412 elif self._match_text_seq("BY", "RANDOM"): 2413 kind = "RANDOM" 2414 2415 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2416 buckets: t.Optional[exp.Expression] = None 2417 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2418 buckets = self._parse_number() 2419 2420 return self.expression( 2421 exp.DistributedByProperty, 2422 expressions=expressions, 2423 kind=kind, 2424 buckets=buckets, 2425 order=self._parse_order(), 2426 ) 2427 2428 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2429 self._match_text_seq("KEY") 2430 expressions = self._parse_wrapped_id_vars() 2431 return self.expression(expr_type, expressions=expressions) 2432 2433 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2434 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2435 prop = self._parse_system_versioning_property(with_=True) 2436 self._match_r_paren() 2437 return prop 2438 2439 if self._match(TokenType.L_PAREN, advance=False): 2440 return self._parse_wrapped_properties() 2441 2442 if self._match_text_seq("JOURNAL"): 2443 return self._parse_withjournaltable() 2444 2445 if self._match_texts(self.VIEW_ATTRIBUTES): 2446 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2447 2448 if self._match_text_seq("DATA"): 2449 return self._parse_withdata(no=False) 2450 elif self._match_text_seq("NO", "DATA"): 2451 return self._parse_withdata(no=True) 2452 2453 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2454 return self._parse_serde_properties(with_=True) 2455 2456 if self._match(TokenType.SCHEMA): 2457 return self.expression( 2458 exp.WithSchemaBindingProperty, 2459 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2460 ) 2461 2462 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2463 return self.expression( 2464 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2465 ) 2466 2467 if not self._next: 2468 return None 2469 2470 return self._parse_withisolatedloading() 2471 2472 def _parse_procedure_option(self) -> exp.Expression | None: 2473 if self._match_text_seq("EXECUTE", "AS"): 2474 return self.expression( 2475 exp.ExecuteAsProperty, 2476 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2477 or self._parse_string(), 2478 ) 2479 2480 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2481 2482 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2483 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2484 self._match(TokenType.EQ) 2485 2486 user = self._parse_id_var() 2487 self._match(TokenType.PARAMETER) 2488 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2489 2490 if not user or not host: 2491 return None 2492 2493 return exp.DefinerProperty(this=f"{user}@{host}") 2494 2495 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2496 self._match(TokenType.TABLE) 2497 self._match(TokenType.EQ) 2498 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2499 2500 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2501 return self.expression(exp.LogProperty, no=no) 2502 2503 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2504 return self.expression(exp.JournalProperty, **kwargs) 2505 2506 def _parse_checksum(self) -> exp.ChecksumProperty: 2507 self._match(TokenType.EQ) 2508 2509 on = None 2510 if self._match(TokenType.ON): 2511 on = True 2512 elif self._match_text_seq("OFF"): 2513 on = False 2514 2515 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2516 2517 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2518 return self.expression( 2519 exp.Cluster, 2520 expressions=( 2521 self._parse_wrapped_csv(self._parse_ordered) 2522 if wrapped 2523 else self._parse_csv(self._parse_ordered) 2524 ), 2525 ) 2526 2527 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2528 self._match_text_seq("BY") 2529 2530 self._match_l_paren() 2531 expressions = self._parse_csv(self._parse_column) 2532 self._match_r_paren() 2533 2534 if self._match_text_seq("SORTED", "BY"): 2535 self._match_l_paren() 2536 sorted_by = self._parse_csv(self._parse_ordered) 2537 self._match_r_paren() 2538 else: 2539 sorted_by = None 2540 2541 self._match(TokenType.INTO) 2542 buckets = self._parse_number() 2543 self._match_text_seq("BUCKETS") 2544 2545 return self.expression( 2546 exp.ClusteredByProperty, 2547 expressions=expressions, 2548 sorted_by=sorted_by, 2549 buckets=buckets, 2550 ) 2551 2552 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2553 if not self._match_text_seq("GRANTS"): 2554 self._retreat(self._index - 1) 2555 return None 2556 2557 return self.expression(exp.CopyGrantsProperty) 2558 2559 def _parse_freespace(self) -> exp.FreespaceProperty: 2560 self._match(TokenType.EQ) 2561 return self.expression( 2562 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2563 ) 2564 2565 def _parse_mergeblockratio( 2566 self, no: bool = False, default: bool = False 2567 ) -> exp.MergeBlockRatioProperty: 2568 if self._match(TokenType.EQ): 2569 return self.expression( 2570 exp.MergeBlockRatioProperty, 2571 this=self._parse_number(), 2572 percent=self._match(TokenType.PERCENT), 2573 ) 2574 2575 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2576 2577 def _parse_datablocksize( 2578 self, 2579 default: t.Optional[bool] = None, 2580 minimum: t.Optional[bool] = None, 2581 maximum: t.Optional[bool] = None, 2582 ) -> exp.DataBlocksizeProperty: 2583 self._match(TokenType.EQ) 2584 size = self._parse_number() 2585 2586 units = None 2587 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2588 units = self._prev.text 2589 2590 return self.expression( 2591 exp.DataBlocksizeProperty, 2592 size=size, 2593 units=units, 2594 default=default, 2595 minimum=minimum, 2596 maximum=maximum, 2597 ) 2598 2599 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2600 self._match(TokenType.EQ) 2601 always = self._match_text_seq("ALWAYS") 2602 manual = self._match_text_seq("MANUAL") 2603 never = self._match_text_seq("NEVER") 2604 default = self._match_text_seq("DEFAULT") 2605 2606 autotemp = None 2607 if self._match_text_seq("AUTOTEMP"): 2608 autotemp = self._parse_schema() 2609 2610 return self.expression( 2611 exp.BlockCompressionProperty, 2612 always=always, 2613 manual=manual, 2614 never=never, 2615 default=default, 2616 autotemp=autotemp, 2617 ) 2618 2619 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2620 index = self._index 2621 no = self._match_text_seq("NO") 2622 concurrent = self._match_text_seq("CONCURRENT") 2623 2624 if not self._match_text_seq("ISOLATED", "LOADING"): 2625 self._retreat(index) 2626 return None 2627 2628 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2629 return self.expression( 2630 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2631 ) 2632 2633 def _parse_locking(self) -> exp.LockingProperty: 2634 if self._match(TokenType.TABLE): 2635 kind = "TABLE" 2636 elif self._match(TokenType.VIEW): 2637 kind = "VIEW" 2638 elif self._match(TokenType.ROW): 2639 kind = "ROW" 2640 elif self._match_text_seq("DATABASE"): 2641 kind = "DATABASE" 2642 else: 2643 kind = None 2644 2645 if kind in ("DATABASE", "TABLE", "VIEW"): 2646 this = self._parse_table_parts() 2647 else: 2648 this = None 2649 2650 if self._match(TokenType.FOR): 2651 for_or_in = "FOR" 2652 elif self._match(TokenType.IN): 2653 for_or_in = "IN" 2654 else: 2655 for_or_in = None 2656 2657 if self._match_text_seq("ACCESS"): 2658 lock_type = "ACCESS" 2659 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2660 lock_type = "EXCLUSIVE" 2661 elif self._match_text_seq("SHARE"): 2662 lock_type = "SHARE" 2663 elif self._match_text_seq("READ"): 2664 lock_type = "READ" 2665 elif self._match_text_seq("WRITE"): 2666 lock_type = "WRITE" 2667 elif self._match_text_seq("CHECKSUM"): 2668 lock_type = "CHECKSUM" 2669 else: 2670 lock_type = None 2671 2672 override = self._match_text_seq("OVERRIDE") 2673 2674 return self.expression( 2675 exp.LockingProperty, 2676 this=this, 2677 kind=kind, 2678 for_or_in=for_or_in, 2679 lock_type=lock_type, 2680 override=override, 2681 ) 2682 2683 def _parse_partition_by(self) -> t.List[exp.Expression]: 2684 if self._match(TokenType.PARTITION_BY): 2685 return self._parse_csv(self._parse_assignment) 2686 return [] 2687 2688 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2689 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2690 if self._match_text_seq("MINVALUE"): 2691 return exp.var("MINVALUE") 2692 if self._match_text_seq("MAXVALUE"): 2693 return exp.var("MAXVALUE") 2694 return self._parse_bitwise() 2695 2696 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2697 expression = None 2698 from_expressions = None 2699 to_expressions = None 2700 2701 if self._match(TokenType.IN): 2702 this = self._parse_wrapped_csv(self._parse_bitwise) 2703 elif self._match(TokenType.FROM): 2704 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2705 self._match_text_seq("TO") 2706 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2707 elif self._match_text_seq("WITH", "(", "MODULUS"): 2708 this = self._parse_number() 2709 self._match_text_seq(",", "REMAINDER") 2710 expression = self._parse_number() 2711 self._match_r_paren() 2712 else: 2713 self.raise_error("Failed to parse partition bound spec.") 2714 2715 return self.expression( 2716 exp.PartitionBoundSpec, 2717 this=this, 2718 expression=expression, 2719 from_expressions=from_expressions, 2720 to_expressions=to_expressions, 2721 ) 2722 2723 # https://www.postgresql.org/docs/current/sql-createtable.html 2724 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2725 if not self._match_text_seq("OF"): 2726 self._retreat(self._index - 1) 2727 return None 2728 2729 this = self._parse_table(schema=True) 2730 2731 if self._match(TokenType.DEFAULT): 2732 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2733 elif self._match_text_seq("FOR", "VALUES"): 2734 expression = self._parse_partition_bound_spec() 2735 else: 2736 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2737 2738 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2739 2740 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2741 self._match(TokenType.EQ) 2742 return self.expression( 2743 exp.PartitionedByProperty, 2744 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2745 ) 2746 2747 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2748 if self._match_text_seq("AND", "STATISTICS"): 2749 statistics = True 2750 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2751 statistics = False 2752 else: 2753 statistics = None 2754 2755 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2756 2757 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2758 if self._match_text_seq("SQL"): 2759 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2760 return None 2761 2762 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2763 if self._match_text_seq("SQL", "DATA"): 2764 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2765 return None 2766 2767 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2768 if self._match_text_seq("PRIMARY", "INDEX"): 2769 return exp.NoPrimaryIndexProperty() 2770 if self._match_text_seq("SQL"): 2771 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2772 return None 2773 2774 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2775 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2776 return exp.OnCommitProperty() 2777 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2778 return exp.OnCommitProperty(delete=True) 2779 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2780 2781 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2782 if self._match_text_seq("SQL", "DATA"): 2783 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2784 return None 2785 2786 def _parse_distkey(self) -> exp.DistKeyProperty: 2787 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2788 2789 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2790 table = self._parse_table(schema=True) 2791 2792 options = [] 2793 while self._match_texts(("INCLUDING", "EXCLUDING")): 2794 this = self._prev.text.upper() 2795 2796 id_var = self._parse_id_var() 2797 if not id_var: 2798 return None 2799 2800 options.append( 2801 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2802 ) 2803 2804 return self.expression(exp.LikeProperty, this=table, expressions=options) 2805 2806 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2807 return self.expression( 2808 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2809 ) 2810 2811 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2812 self._match(TokenType.EQ) 2813 return self.expression( 2814 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2815 ) 2816 2817 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2818 self._match_text_seq("WITH", "CONNECTION") 2819 return self.expression( 2820 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2821 ) 2822 2823 def _parse_returns(self) -> exp.ReturnsProperty: 2824 value: t.Optional[exp.Expression] 2825 null = None 2826 is_table = self._match(TokenType.TABLE) 2827 2828 if is_table: 2829 if self._match(TokenType.LT): 2830 value = self.expression( 2831 exp.Schema, 2832 this="TABLE", 2833 expressions=self._parse_csv(self._parse_struct_types), 2834 ) 2835 if not self._match(TokenType.GT): 2836 self.raise_error("Expecting >") 2837 else: 2838 value = self._parse_schema(exp.var("TABLE")) 2839 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2840 null = True 2841 value = None 2842 else: 2843 value = self._parse_types() 2844 2845 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2846 2847 def _parse_describe(self) -> exp.Describe: 2848 kind = self._match_set(self.CREATABLES) and self._prev.text 2849 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2850 if self._match(TokenType.DOT): 2851 style = None 2852 self._retreat(self._index - 2) 2853 2854 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2855 2856 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2857 this = self._parse_statement() 2858 else: 2859 this = self._parse_table(schema=True) 2860 2861 properties = self._parse_properties() 2862 expressions = properties.expressions if properties else None 2863 partition = self._parse_partition() 2864 return self.expression( 2865 exp.Describe, 2866 this=this, 2867 style=style, 2868 kind=kind, 2869 expressions=expressions, 2870 partition=partition, 2871 format=format, 2872 ) 2873 2874 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2875 kind = self._prev.text.upper() 2876 expressions = [] 2877 2878 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2879 if self._match(TokenType.WHEN): 2880 expression = self._parse_disjunction() 2881 self._match(TokenType.THEN) 2882 else: 2883 expression = None 2884 2885 else_ = self._match(TokenType.ELSE) 2886 2887 if not self._match(TokenType.INTO): 2888 return None 2889 2890 return self.expression( 2891 exp.ConditionalInsert, 2892 this=self.expression( 2893 exp.Insert, 2894 this=self._parse_table(schema=True), 2895 expression=self._parse_derived_table_values(), 2896 ), 2897 expression=expression, 2898 else_=else_, 2899 ) 2900 2901 expression = parse_conditional_insert() 2902 while expression is not None: 2903 expressions.append(expression) 2904 expression = parse_conditional_insert() 2905 2906 return self.expression( 2907 exp.MultitableInserts, 2908 kind=kind, 2909 comments=comments, 2910 expressions=expressions, 2911 source=self._parse_table(), 2912 ) 2913 2914 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2915 comments = [] 2916 hint = self._parse_hint() 2917 overwrite = self._match(TokenType.OVERWRITE) 2918 ignore = self._match(TokenType.IGNORE) 2919 local = self._match_text_seq("LOCAL") 2920 alternative = None 2921 is_function = None 2922 2923 if self._match_text_seq("DIRECTORY"): 2924 this: t.Optional[exp.Expression] = self.expression( 2925 exp.Directory, 2926 this=self._parse_var_or_string(), 2927 local=local, 2928 row_format=self._parse_row_format(match_row=True), 2929 ) 2930 else: 2931 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2932 comments += ensure_list(self._prev_comments) 2933 return self._parse_multitable_inserts(comments) 2934 2935 if self._match(TokenType.OR): 2936 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2937 2938 self._match(TokenType.INTO) 2939 comments += ensure_list(self._prev_comments) 2940 self._match(TokenType.TABLE) 2941 is_function = self._match(TokenType.FUNCTION) 2942 2943 this = ( 2944 self._parse_table(schema=True, parse_partition=True) 2945 if not is_function 2946 else self._parse_function() 2947 ) 2948 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2949 this.set("alias", self._parse_table_alias()) 2950 2951 returning = self._parse_returning() 2952 2953 return self.expression( 2954 exp.Insert, 2955 comments=comments, 2956 hint=hint, 2957 is_function=is_function, 2958 this=this, 2959 stored=self._match_text_seq("STORED") and self._parse_stored(), 2960 by_name=self._match_text_seq("BY", "NAME"), 2961 exists=self._parse_exists(), 2962 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2963 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2964 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2965 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2966 conflict=self._parse_on_conflict(), 2967 returning=returning or self._parse_returning(), 2968 overwrite=overwrite, 2969 alternative=alternative, 2970 ignore=ignore, 2971 source=self._match(TokenType.TABLE) and self._parse_table(), 2972 ) 2973 2974 def _parse_kill(self) -> exp.Kill: 2975 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2976 2977 return self.expression( 2978 exp.Kill, 2979 this=self._parse_primary(), 2980 kind=kind, 2981 ) 2982 2983 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2984 conflict = self._match_text_seq("ON", "CONFLICT") 2985 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2986 2987 if not conflict and not duplicate: 2988 return None 2989 2990 conflict_keys = None 2991 constraint = None 2992 2993 if conflict: 2994 if self._match_text_seq("ON", "CONSTRAINT"): 2995 constraint = self._parse_id_var() 2996 elif self._match(TokenType.L_PAREN): 2997 conflict_keys = self._parse_csv(self._parse_id_var) 2998 self._match_r_paren() 2999 3000 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3001 if self._prev.token_type == TokenType.UPDATE: 3002 self._match(TokenType.SET) 3003 expressions = self._parse_csv(self._parse_equality) 3004 else: 3005 expressions = None 3006 3007 return self.expression( 3008 exp.OnConflict, 3009 duplicate=duplicate, 3010 expressions=expressions, 3011 action=action, 3012 conflict_keys=conflict_keys, 3013 constraint=constraint, 3014 where=self._parse_where(), 3015 ) 3016 3017 def _parse_returning(self) -> t.Optional[exp.Returning]: 3018 if not self._match(TokenType.RETURNING): 3019 return None 3020 return self.expression( 3021 exp.Returning, 3022 expressions=self._parse_csv(self._parse_expression), 3023 into=self._match(TokenType.INTO) and self._parse_table_part(), 3024 ) 3025 3026 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3027 if not self._match(TokenType.FORMAT): 3028 return None 3029 return self._parse_row_format() 3030 3031 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3032 index = self._index 3033 with_ = with_ or self._match_text_seq("WITH") 3034 3035 if not self._match(TokenType.SERDE_PROPERTIES): 3036 self._retreat(index) 3037 return None 3038 return self.expression( 3039 exp.SerdeProperties, 3040 **{ # type: ignore 3041 "expressions": self._parse_wrapped_properties(), 3042 "with": with_, 3043 }, 3044 ) 3045 3046 def _parse_row_format( 3047 self, match_row: bool = False 3048 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3049 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3050 return None 3051 3052 if self._match_text_seq("SERDE"): 3053 this = self._parse_string() 3054 3055 serde_properties = self._parse_serde_properties() 3056 3057 return self.expression( 3058 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3059 ) 3060 3061 self._match_text_seq("DELIMITED") 3062 3063 kwargs = {} 3064 3065 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3066 kwargs["fields"] = self._parse_string() 3067 if self._match_text_seq("ESCAPED", "BY"): 3068 kwargs["escaped"] = self._parse_string() 3069 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3070 kwargs["collection_items"] = self._parse_string() 3071 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3072 kwargs["map_keys"] = self._parse_string() 3073 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3074 kwargs["lines"] = self._parse_string() 3075 if self._match_text_seq("NULL", "DEFINED", "AS"): 3076 kwargs["null"] = self._parse_string() 3077 3078 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3079 3080 def _parse_load(self) -> exp.LoadData | exp.Command: 3081 if self._match_text_seq("DATA"): 3082 local = self._match_text_seq("LOCAL") 3083 self._match_text_seq("INPATH") 3084 inpath = self._parse_string() 3085 overwrite = self._match(TokenType.OVERWRITE) 3086 self._match_pair(TokenType.INTO, TokenType.TABLE) 3087 3088 return self.expression( 3089 exp.LoadData, 3090 this=self._parse_table(schema=True), 3091 local=local, 3092 overwrite=overwrite, 3093 inpath=inpath, 3094 partition=self._parse_partition(), 3095 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3096 serde=self._match_text_seq("SERDE") and self._parse_string(), 3097 ) 3098 return self._parse_as_command(self._prev) 3099 3100 def _parse_delete(self) -> exp.Delete: 3101 # This handles MySQL's "Multiple-Table Syntax" 3102 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3103 tables = None 3104 if not self._match(TokenType.FROM, advance=False): 3105 tables = self._parse_csv(self._parse_table) or None 3106 3107 returning = self._parse_returning() 3108 3109 return self.expression( 3110 exp.Delete, 3111 tables=tables, 3112 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3113 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3114 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3115 where=self._parse_where(), 3116 returning=returning or self._parse_returning(), 3117 limit=self._parse_limit(), 3118 ) 3119 3120 def _parse_update(self) -> exp.Update: 3121 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3122 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3123 returning = self._parse_returning() 3124 return self.expression( 3125 exp.Update, 3126 **{ # type: ignore 3127 "this": this, 3128 "expressions": expressions, 3129 "from": self._parse_from(joins=True), 3130 "where": self._parse_where(), 3131 "returning": returning or self._parse_returning(), 3132 "order": self._parse_order(), 3133 "limit": self._parse_limit(), 3134 }, 3135 ) 3136 3137 def _parse_use(self) -> exp.Use: 3138 return self.expression( 3139 exp.Use, 3140 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3141 this=self._parse_table(schema=False), 3142 ) 3143 3144 def _parse_uncache(self) -> exp.Uncache: 3145 if not self._match(TokenType.TABLE): 3146 self.raise_error("Expecting TABLE after UNCACHE") 3147 3148 return self.expression( 3149 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3150 ) 3151 3152 def _parse_cache(self) -> exp.Cache: 3153 lazy = self._match_text_seq("LAZY") 3154 self._match(TokenType.TABLE) 3155 table = self._parse_table(schema=True) 3156 3157 options = [] 3158 if self._match_text_seq("OPTIONS"): 3159 self._match_l_paren() 3160 k = self._parse_string() 3161 self._match(TokenType.EQ) 3162 v = self._parse_string() 3163 options = [k, v] 3164 self._match_r_paren() 3165 3166 self._match(TokenType.ALIAS) 3167 return self.expression( 3168 exp.Cache, 3169 this=table, 3170 lazy=lazy, 3171 options=options, 3172 expression=self._parse_select(nested=True), 3173 ) 3174 3175 def _parse_partition(self) -> t.Optional[exp.Partition]: 3176 if not self._match_texts(self.PARTITION_KEYWORDS): 3177 return None 3178 3179 return self.expression( 3180 exp.Partition, 3181 subpartition=self._prev.text.upper() == "SUBPARTITION", 3182 expressions=self._parse_wrapped_csv(self._parse_assignment), 3183 ) 3184 3185 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3186 def _parse_value_expression() -> t.Optional[exp.Expression]: 3187 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3188 return exp.var(self._prev.text.upper()) 3189 return self._parse_expression() 3190 3191 if self._match(TokenType.L_PAREN): 3192 expressions = self._parse_csv(_parse_value_expression) 3193 self._match_r_paren() 3194 return self.expression(exp.Tuple, expressions=expressions) 3195 3196 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3197 expression = self._parse_expression() 3198 if expression: 3199 return self.expression(exp.Tuple, expressions=[expression]) 3200 return None 3201 3202 def _parse_projections(self) -> t.List[exp.Expression]: 3203 return self._parse_expressions() 3204 3205 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3206 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3207 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3208 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3209 ) 3210 elif self._match(TokenType.FROM): 3211 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3212 # Support parentheses for duckdb FROM-first syntax 3213 select = self._parse_select(from_=from_) 3214 if select: 3215 if not select.args.get("from"): 3216 select.set("from", from_) 3217 this = select 3218 else: 3219 this = exp.select("*").from_(t.cast(exp.From, from_)) 3220 else: 3221 this = ( 3222 self._parse_table(consume_pipe=True) 3223 if table 3224 else self._parse_select(nested=True, parse_set_operation=False) 3225 ) 3226 3227 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3228 # in case a modifier (e.g. join) is following 3229 if table and isinstance(this, exp.Values) and this.alias: 3230 alias = this.args["alias"].pop() 3231 this = exp.Table(this=this, alias=alias) 3232 3233 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3234 3235 return this 3236 3237 def _parse_select( 3238 self, 3239 nested: bool = False, 3240 table: bool = False, 3241 parse_subquery_alias: bool = True, 3242 parse_set_operation: bool = True, 3243 consume_pipe: bool = True, 3244 from_: t.Optional[exp.From] = None, 3245 ) -> t.Optional[exp.Expression]: 3246 query = self._parse_select_query( 3247 nested=nested, 3248 table=table, 3249 parse_subquery_alias=parse_subquery_alias, 3250 parse_set_operation=parse_set_operation, 3251 ) 3252 3253 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3254 if not query and from_: 3255 query = exp.select("*").from_(from_) 3256 if isinstance(query, exp.Query): 3257 query = self._parse_pipe_syntax_query(query) 3258 query = query.subquery(copy=False) if query and table else query 3259 3260 return query 3261 3262 def _parse_select_query( 3263 self, 3264 nested: bool = False, 3265 table: bool = False, 3266 parse_subquery_alias: bool = True, 3267 parse_set_operation: bool = True, 3268 ) -> t.Optional[exp.Expression]: 3269 cte = self._parse_with() 3270 3271 if cte: 3272 this = self._parse_statement() 3273 3274 if not this: 3275 self.raise_error("Failed to parse any statement following CTE") 3276 return cte 3277 3278 if "with" in this.arg_types: 3279 this.set("with", cte) 3280 else: 3281 self.raise_error(f"{this.key} does not support CTE") 3282 this = cte 3283 3284 return this 3285 3286 # duckdb supports leading with FROM x 3287 from_ = ( 3288 self._parse_from(consume_pipe=True) 3289 if self._match(TokenType.FROM, advance=False) 3290 else None 3291 ) 3292 3293 if self._match(TokenType.SELECT): 3294 comments = self._prev_comments 3295 3296 hint = self._parse_hint() 3297 3298 if self._next and not self._next.token_type == TokenType.DOT: 3299 all_ = self._match(TokenType.ALL) 3300 distinct = self._match_set(self.DISTINCT_TOKENS) 3301 else: 3302 all_, distinct = None, None 3303 3304 kind = ( 3305 self._match(TokenType.ALIAS) 3306 and self._match_texts(("STRUCT", "VALUE")) 3307 and self._prev.text.upper() 3308 ) 3309 3310 if distinct: 3311 distinct = self.expression( 3312 exp.Distinct, 3313 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3314 ) 3315 3316 if all_ and distinct: 3317 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3318 3319 operation_modifiers = [] 3320 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3321 operation_modifiers.append(exp.var(self._prev.text.upper())) 3322 3323 limit = self._parse_limit(top=True) 3324 projections = self._parse_projections() 3325 3326 this = self.expression( 3327 exp.Select, 3328 kind=kind, 3329 hint=hint, 3330 distinct=distinct, 3331 expressions=projections, 3332 limit=limit, 3333 operation_modifiers=operation_modifiers or None, 3334 ) 3335 this.comments = comments 3336 3337 into = self._parse_into() 3338 if into: 3339 this.set("into", into) 3340 3341 if not from_: 3342 from_ = self._parse_from() 3343 3344 if from_: 3345 this.set("from", from_) 3346 3347 this = self._parse_query_modifiers(this) 3348 elif (table or nested) and self._match(TokenType.L_PAREN): 3349 this = self._parse_wrapped_select(table=table) 3350 3351 # We return early here so that the UNION isn't attached to the subquery by the 3352 # following call to _parse_set_operations, but instead becomes the parent node 3353 self._match_r_paren() 3354 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3355 elif self._match(TokenType.VALUES, advance=False): 3356 this = self._parse_derived_table_values() 3357 elif from_: 3358 this = exp.select("*").from_(from_.this, copy=False) 3359 elif self._match(TokenType.SUMMARIZE): 3360 table = self._match(TokenType.TABLE) 3361 this = self._parse_select() or self._parse_string() or self._parse_table() 3362 return self.expression(exp.Summarize, this=this, table=table) 3363 elif self._match(TokenType.DESCRIBE): 3364 this = self._parse_describe() 3365 elif self._match_text_seq("STREAM"): 3366 this = self._parse_function() 3367 if this: 3368 this = self.expression(exp.Stream, this=this) 3369 else: 3370 self._retreat(self._index - 1) 3371 else: 3372 this = None 3373 3374 return self._parse_set_operations(this) if parse_set_operation else this 3375 3376 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3377 self._match_text_seq("SEARCH") 3378 3379 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3380 3381 if not kind: 3382 return None 3383 3384 self._match_text_seq("FIRST", "BY") 3385 3386 return self.expression( 3387 exp.RecursiveWithSearch, 3388 kind=kind, 3389 this=self._parse_id_var(), 3390 expression=self._match_text_seq("SET") and self._parse_id_var(), 3391 using=self._match_text_seq("USING") and self._parse_id_var(), 3392 ) 3393 3394 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3395 if not skip_with_token and not self._match(TokenType.WITH): 3396 return None 3397 3398 comments = self._prev_comments 3399 recursive = self._match(TokenType.RECURSIVE) 3400 3401 last_comments = None 3402 expressions = [] 3403 while True: 3404 cte = self._parse_cte() 3405 if isinstance(cte, exp.CTE): 3406 expressions.append(cte) 3407 if last_comments: 3408 cte.add_comments(last_comments) 3409 3410 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3411 break 3412 else: 3413 self._match(TokenType.WITH) 3414 3415 last_comments = self._prev_comments 3416 3417 return self.expression( 3418 exp.With, 3419 comments=comments, 3420 expressions=expressions, 3421 recursive=recursive, 3422 search=self._parse_recursive_with_search(), 3423 ) 3424 3425 def _parse_cte(self) -> t.Optional[exp.CTE]: 3426 index = self._index 3427 3428 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3429 if not alias or not alias.this: 3430 self.raise_error("Expected CTE to have alias") 3431 3432 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3433 self._retreat(index) 3434 return None 3435 3436 comments = self._prev_comments 3437 3438 if self._match_text_seq("NOT", "MATERIALIZED"): 3439 materialized = False 3440 elif self._match_text_seq("MATERIALIZED"): 3441 materialized = True 3442 else: 3443 materialized = None 3444 3445 cte = self.expression( 3446 exp.CTE, 3447 this=self._parse_wrapped(self._parse_statement), 3448 alias=alias, 3449 materialized=materialized, 3450 comments=comments, 3451 ) 3452 3453 values = cte.this 3454 if isinstance(values, exp.Values): 3455 if values.alias: 3456 cte.set("this", exp.select("*").from_(values)) 3457 else: 3458 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3459 3460 return cte 3461 3462 def _parse_table_alias( 3463 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3464 ) -> t.Optional[exp.TableAlias]: 3465 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3466 # so this section tries to parse the clause version and if it fails, it treats the token 3467 # as an identifier (alias) 3468 if self._can_parse_limit_or_offset(): 3469 return None 3470 3471 any_token = self._match(TokenType.ALIAS) 3472 alias = ( 3473 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3474 or self._parse_string_as_identifier() 3475 ) 3476 3477 index = self._index 3478 if self._match(TokenType.L_PAREN): 3479 columns = self._parse_csv(self._parse_function_parameter) 3480 self._match_r_paren() if columns else self._retreat(index) 3481 else: 3482 columns = None 3483 3484 if not alias and not columns: 3485 return None 3486 3487 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3488 3489 # We bubble up comments from the Identifier to the TableAlias 3490 if isinstance(alias, exp.Identifier): 3491 table_alias.add_comments(alias.pop_comments()) 3492 3493 return table_alias 3494 3495 def _parse_subquery( 3496 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3497 ) -> t.Optional[exp.Subquery]: 3498 if not this: 3499 return None 3500 3501 return self.expression( 3502 exp.Subquery, 3503 this=this, 3504 pivots=self._parse_pivots(), 3505 alias=self._parse_table_alias() if parse_alias else None, 3506 sample=self._parse_table_sample(), 3507 ) 3508 3509 def _implicit_unnests_to_explicit(self, this: E) -> E: 3510 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3511 3512 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3513 for i, join in enumerate(this.args.get("joins") or []): 3514 table = join.this 3515 normalized_table = table.copy() 3516 normalized_table.meta["maybe_column"] = True 3517 normalized_table = _norm(normalized_table, dialect=self.dialect) 3518 3519 if isinstance(table, exp.Table) and not join.args.get("on"): 3520 if normalized_table.parts[0].name in refs: 3521 table_as_column = table.to_column() 3522 unnest = exp.Unnest(expressions=[table_as_column]) 3523 3524 # Table.to_column creates a parent Alias node that we want to convert to 3525 # a TableAlias and attach to the Unnest, so it matches the parser's output 3526 if isinstance(table.args.get("alias"), exp.TableAlias): 3527 table_as_column.replace(table_as_column.this) 3528 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3529 3530 table.replace(unnest) 3531 3532 refs.add(normalized_table.alias_or_name) 3533 3534 return this 3535 3536 def _parse_query_modifiers( 3537 self, this: t.Optional[exp.Expression] 3538 ) -> t.Optional[exp.Expression]: 3539 if isinstance(this, self.MODIFIABLES): 3540 for join in self._parse_joins(): 3541 this.append("joins", join) 3542 for lateral in iter(self._parse_lateral, None): 3543 this.append("laterals", lateral) 3544 3545 while True: 3546 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3547 modifier_token = self._curr 3548 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3549 key, expression = parser(self) 3550 3551 if expression: 3552 if this.args.get(key): 3553 self.raise_error( 3554 f"Found multiple '{modifier_token.text.upper()}' clauses", 3555 token=modifier_token, 3556 ) 3557 3558 this.set(key, expression) 3559 if key == "limit": 3560 offset = expression.args.pop("offset", None) 3561 3562 if offset: 3563 offset = exp.Offset(expression=offset) 3564 this.set("offset", offset) 3565 3566 limit_by_expressions = expression.expressions 3567 expression.set("expressions", None) 3568 offset.set("expressions", limit_by_expressions) 3569 continue 3570 break 3571 3572 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3573 this = self._implicit_unnests_to_explicit(this) 3574 3575 return this 3576 3577 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3578 start = self._curr 3579 while self._curr: 3580 self._advance() 3581 3582 end = self._tokens[self._index - 1] 3583 return exp.Hint(expressions=[self._find_sql(start, end)]) 3584 3585 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3586 return self._parse_function_call() 3587 3588 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3589 start_index = self._index 3590 should_fallback_to_string = False 3591 3592 hints = [] 3593 try: 3594 for hint in iter( 3595 lambda: self._parse_csv( 3596 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3597 ), 3598 [], 3599 ): 3600 hints.extend(hint) 3601 except ParseError: 3602 should_fallback_to_string = True 3603 3604 if should_fallback_to_string or self._curr: 3605 self._retreat(start_index) 3606 return self._parse_hint_fallback_to_string() 3607 3608 return self.expression(exp.Hint, expressions=hints) 3609 3610 def _parse_hint(self) -> t.Optional[exp.Hint]: 3611 if self._match(TokenType.HINT) and self._prev_comments: 3612 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3613 3614 return None 3615 3616 def _parse_into(self) -> t.Optional[exp.Into]: 3617 if not self._match(TokenType.INTO): 3618 return None 3619 3620 temp = self._match(TokenType.TEMPORARY) 3621 unlogged = self._match_text_seq("UNLOGGED") 3622 self._match(TokenType.TABLE) 3623 3624 return self.expression( 3625 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3626 ) 3627 3628 def _parse_from( 3629 self, 3630 joins: bool = False, 3631 skip_from_token: bool = False, 3632 consume_pipe: bool = False, 3633 ) -> t.Optional[exp.From]: 3634 if not skip_from_token and not self._match(TokenType.FROM): 3635 return None 3636 3637 return self.expression( 3638 exp.From, 3639 comments=self._prev_comments, 3640 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3641 ) 3642 3643 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3644 return self.expression( 3645 exp.MatchRecognizeMeasure, 3646 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3647 this=self._parse_expression(), 3648 ) 3649 3650 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3651 if not self._match(TokenType.MATCH_RECOGNIZE): 3652 return None 3653 3654 self._match_l_paren() 3655 3656 partition = self._parse_partition_by() 3657 order = self._parse_order() 3658 3659 measures = ( 3660 self._parse_csv(self._parse_match_recognize_measure) 3661 if self._match_text_seq("MEASURES") 3662 else None 3663 ) 3664 3665 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3666 rows = exp.var("ONE ROW PER MATCH") 3667 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3668 text = "ALL ROWS PER MATCH" 3669 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3670 text += " SHOW EMPTY MATCHES" 3671 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3672 text += " OMIT EMPTY MATCHES" 3673 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3674 text += " WITH UNMATCHED ROWS" 3675 rows = exp.var(text) 3676 else: 3677 rows = None 3678 3679 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3680 text = "AFTER MATCH SKIP" 3681 if self._match_text_seq("PAST", "LAST", "ROW"): 3682 text += " PAST LAST ROW" 3683 elif self._match_text_seq("TO", "NEXT", "ROW"): 3684 text += " TO NEXT ROW" 3685 elif self._match_text_seq("TO", "FIRST"): 3686 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3687 elif self._match_text_seq("TO", "LAST"): 3688 text += f" TO LAST {self._advance_any().text}" # type: ignore 3689 after = exp.var(text) 3690 else: 3691 after = None 3692 3693 if self._match_text_seq("PATTERN"): 3694 self._match_l_paren() 3695 3696 if not self._curr: 3697 self.raise_error("Expecting )", self._curr) 3698 3699 paren = 1 3700 start = self._curr 3701 3702 while self._curr and paren > 0: 3703 if self._curr.token_type == TokenType.L_PAREN: 3704 paren += 1 3705 if self._curr.token_type == TokenType.R_PAREN: 3706 paren -= 1 3707 3708 end = self._prev 3709 self._advance() 3710 3711 if paren > 0: 3712 self.raise_error("Expecting )", self._curr) 3713 3714 pattern = exp.var(self._find_sql(start, end)) 3715 else: 3716 pattern = None 3717 3718 define = ( 3719 self._parse_csv(self._parse_name_as_expression) 3720 if self._match_text_seq("DEFINE") 3721 else None 3722 ) 3723 3724 self._match_r_paren() 3725 3726 return self.expression( 3727 exp.MatchRecognize, 3728 partition_by=partition, 3729 order=order, 3730 measures=measures, 3731 rows=rows, 3732 after=after, 3733 pattern=pattern, 3734 define=define, 3735 alias=self._parse_table_alias(), 3736 ) 3737 3738 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3739 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3740 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3741 cross_apply = False 3742 3743 if cross_apply is not None: 3744 this = self._parse_select(table=True) 3745 view = None 3746 outer = None 3747 elif self._match(TokenType.LATERAL): 3748 this = self._parse_select(table=True) 3749 view = self._match(TokenType.VIEW) 3750 outer = self._match(TokenType.OUTER) 3751 else: 3752 return None 3753 3754 if not this: 3755 this = ( 3756 self._parse_unnest() 3757 or self._parse_function() 3758 or self._parse_id_var(any_token=False) 3759 ) 3760 3761 while self._match(TokenType.DOT): 3762 this = exp.Dot( 3763 this=this, 3764 expression=self._parse_function() or self._parse_id_var(any_token=False), 3765 ) 3766 3767 ordinality: t.Optional[bool] = None 3768 3769 if view: 3770 table = self._parse_id_var(any_token=False) 3771 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3772 table_alias: t.Optional[exp.TableAlias] = self.expression( 3773 exp.TableAlias, this=table, columns=columns 3774 ) 3775 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3776 # We move the alias from the lateral's child node to the lateral itself 3777 table_alias = this.args["alias"].pop() 3778 else: 3779 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3780 table_alias = self._parse_table_alias() 3781 3782 return self.expression( 3783 exp.Lateral, 3784 this=this, 3785 view=view, 3786 outer=outer, 3787 alias=table_alias, 3788 cross_apply=cross_apply, 3789 ordinality=ordinality, 3790 ) 3791 3792 def _parse_join_parts( 3793 self, 3794 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3795 return ( 3796 self._match_set(self.JOIN_METHODS) and self._prev, 3797 self._match_set(self.JOIN_SIDES) and self._prev, 3798 self._match_set(self.JOIN_KINDS) and self._prev, 3799 ) 3800 3801 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3802 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3803 this = self._parse_column() 3804 if isinstance(this, exp.Column): 3805 return this.this 3806 return this 3807 3808 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3809 3810 def _parse_join( 3811 self, skip_join_token: bool = False, parse_bracket: bool = False 3812 ) -> t.Optional[exp.Join]: 3813 if self._match(TokenType.COMMA): 3814 table = self._try_parse(self._parse_table) 3815 cross_join = self.expression(exp.Join, this=table) if table else None 3816 3817 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3818 cross_join.set("kind", "CROSS") 3819 3820 return cross_join 3821 3822 index = self._index 3823 method, side, kind = self._parse_join_parts() 3824 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3825 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3826 join_comments = self._prev_comments 3827 3828 if not skip_join_token and not join: 3829 self._retreat(index) 3830 kind = None 3831 method = None 3832 side = None 3833 3834 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3835 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3836 3837 if not skip_join_token and not join and not outer_apply and not cross_apply: 3838 return None 3839 3840 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3841 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3842 kwargs["expressions"] = self._parse_csv( 3843 lambda: self._parse_table(parse_bracket=parse_bracket) 3844 ) 3845 3846 if method: 3847 kwargs["method"] = method.text 3848 if side: 3849 kwargs["side"] = side.text 3850 if kind: 3851 kwargs["kind"] = kind.text 3852 if hint: 3853 kwargs["hint"] = hint 3854 3855 if self._match(TokenType.MATCH_CONDITION): 3856 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3857 3858 if self._match(TokenType.ON): 3859 kwargs["on"] = self._parse_assignment() 3860 elif self._match(TokenType.USING): 3861 kwargs["using"] = self._parse_using_identifiers() 3862 elif ( 3863 not method 3864 and not (outer_apply or cross_apply) 3865 and not isinstance(kwargs["this"], exp.Unnest) 3866 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3867 ): 3868 index = self._index 3869 joins: t.Optional[list] = list(self._parse_joins()) 3870 3871 if joins and self._match(TokenType.ON): 3872 kwargs["on"] = self._parse_assignment() 3873 elif joins and self._match(TokenType.USING): 3874 kwargs["using"] = self._parse_using_identifiers() 3875 else: 3876 joins = None 3877 self._retreat(index) 3878 3879 kwargs["this"].set("joins", joins if joins else None) 3880 3881 kwargs["pivots"] = self._parse_pivots() 3882 3883 comments = [c for token in (method, side, kind) if token for c in token.comments] 3884 comments = (join_comments or []) + comments 3885 3886 if ( 3887 self.ADD_JOIN_ON_TRUE 3888 and not kwargs.get("on") 3889 and not kwargs.get("using") 3890 and not kwargs.get("method") 3891 and kwargs.get("kind") in (None, "INNER", "OUTER") 3892 ): 3893 kwargs["on"] = exp.true() 3894 3895 return self.expression(exp.Join, comments=comments, **kwargs) 3896 3897 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3898 this = self._parse_assignment() 3899 3900 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3901 return this 3902 3903 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3904 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3905 3906 return this 3907 3908 def _parse_index_params(self) -> exp.IndexParameters: 3909 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3910 3911 if self._match(TokenType.L_PAREN, advance=False): 3912 columns = self._parse_wrapped_csv(self._parse_with_operator) 3913 else: 3914 columns = None 3915 3916 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3917 partition_by = self._parse_partition_by() 3918 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3919 tablespace = ( 3920 self._parse_var(any_token=True) 3921 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3922 else None 3923 ) 3924 where = self._parse_where() 3925 3926 on = self._parse_field() if self._match(TokenType.ON) else None 3927 3928 return self.expression( 3929 exp.IndexParameters, 3930 using=using, 3931 columns=columns, 3932 include=include, 3933 partition_by=partition_by, 3934 where=where, 3935 with_storage=with_storage, 3936 tablespace=tablespace, 3937 on=on, 3938 ) 3939 3940 def _parse_index( 3941 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3942 ) -> t.Optional[exp.Index]: 3943 if index or anonymous: 3944 unique = None 3945 primary = None 3946 amp = None 3947 3948 self._match(TokenType.ON) 3949 self._match(TokenType.TABLE) # hive 3950 table = self._parse_table_parts(schema=True) 3951 else: 3952 unique = self._match(TokenType.UNIQUE) 3953 primary = self._match_text_seq("PRIMARY") 3954 amp = self._match_text_seq("AMP") 3955 3956 if not self._match(TokenType.INDEX): 3957 return None 3958 3959 index = self._parse_id_var() 3960 table = None 3961 3962 params = self._parse_index_params() 3963 3964 return self.expression( 3965 exp.Index, 3966 this=index, 3967 table=table, 3968 unique=unique, 3969 primary=primary, 3970 amp=amp, 3971 params=params, 3972 ) 3973 3974 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3975 hints: t.List[exp.Expression] = [] 3976 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3977 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3978 hints.append( 3979 self.expression( 3980 exp.WithTableHint, 3981 expressions=self._parse_csv( 3982 lambda: self._parse_function() or self._parse_var(any_token=True) 3983 ), 3984 ) 3985 ) 3986 self._match_r_paren() 3987 else: 3988 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3989 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3990 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3991 3992 self._match_set((TokenType.INDEX, TokenType.KEY)) 3993 if self._match(TokenType.FOR): 3994 hint.set("target", self._advance_any() and self._prev.text.upper()) 3995 3996 hint.set("expressions", self._parse_wrapped_id_vars()) 3997 hints.append(hint) 3998 3999 return hints or None 4000 4001 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4002 return ( 4003 (not schema and self._parse_function(optional_parens=False)) 4004 or self._parse_id_var(any_token=False) 4005 or self._parse_string_as_identifier() 4006 or self._parse_placeholder() 4007 ) 4008 4009 def _parse_table_parts( 4010 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4011 ) -> exp.Table: 4012 catalog = None 4013 db = None 4014 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4015 4016 while self._match(TokenType.DOT): 4017 if catalog: 4018 # This allows nesting the table in arbitrarily many dot expressions if needed 4019 table = self.expression( 4020 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4021 ) 4022 else: 4023 catalog = db 4024 db = table 4025 # "" used for tsql FROM a..b case 4026 table = self._parse_table_part(schema=schema) or "" 4027 4028 if ( 4029 wildcard 4030 and self._is_connected() 4031 and (isinstance(table, exp.Identifier) or not table) 4032 and self._match(TokenType.STAR) 4033 ): 4034 if isinstance(table, exp.Identifier): 4035 table.args["this"] += "*" 4036 else: 4037 table = exp.Identifier(this="*") 4038 4039 # We bubble up comments from the Identifier to the Table 4040 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4041 4042 if is_db_reference: 4043 catalog = db 4044 db = table 4045 table = None 4046 4047 if not table and not is_db_reference: 4048 self.raise_error(f"Expected table name but got {self._curr}") 4049 if not db and is_db_reference: 4050 self.raise_error(f"Expected database name but got {self._curr}") 4051 4052 table = self.expression( 4053 exp.Table, 4054 comments=comments, 4055 this=table, 4056 db=db, 4057 catalog=catalog, 4058 ) 4059 4060 changes = self._parse_changes() 4061 if changes: 4062 table.set("changes", changes) 4063 4064 at_before = self._parse_historical_data() 4065 if at_before: 4066 table.set("when", at_before) 4067 4068 pivots = self._parse_pivots() 4069 if pivots: 4070 table.set("pivots", pivots) 4071 4072 return table 4073 4074 def _parse_table( 4075 self, 4076 schema: bool = False, 4077 joins: bool = False, 4078 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4079 parse_bracket: bool = False, 4080 is_db_reference: bool = False, 4081 parse_partition: bool = False, 4082 consume_pipe: bool = False, 4083 ) -> t.Optional[exp.Expression]: 4084 lateral = self._parse_lateral() 4085 if lateral: 4086 return lateral 4087 4088 unnest = self._parse_unnest() 4089 if unnest: 4090 return unnest 4091 4092 values = self._parse_derived_table_values() 4093 if values: 4094 return values 4095 4096 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4097 if subquery: 4098 if not subquery.args.get("pivots"): 4099 subquery.set("pivots", self._parse_pivots()) 4100 return subquery 4101 4102 bracket = parse_bracket and self._parse_bracket(None) 4103 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4104 4105 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4106 self._parse_table 4107 ) 4108 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4109 4110 only = self._match(TokenType.ONLY) 4111 4112 this = t.cast( 4113 exp.Expression, 4114 bracket 4115 or rows_from 4116 or self._parse_bracket( 4117 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4118 ), 4119 ) 4120 4121 if only: 4122 this.set("only", only) 4123 4124 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4125 self._match_text_seq("*") 4126 4127 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4128 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4129 this.set("partition", self._parse_partition()) 4130 4131 if schema: 4132 return self._parse_schema(this=this) 4133 4134 version = self._parse_version() 4135 4136 if version: 4137 this.set("version", version) 4138 4139 if self.dialect.ALIAS_POST_TABLESAMPLE: 4140 this.set("sample", self._parse_table_sample()) 4141 4142 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4143 if alias: 4144 this.set("alias", alias) 4145 4146 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4147 return self.expression( 4148 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4149 ) 4150 4151 this.set("hints", self._parse_table_hints()) 4152 4153 if not this.args.get("pivots"): 4154 this.set("pivots", self._parse_pivots()) 4155 4156 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4157 this.set("sample", self._parse_table_sample()) 4158 4159 if joins: 4160 for join in self._parse_joins(): 4161 this.append("joins", join) 4162 4163 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4164 this.set("ordinality", True) 4165 this.set("alias", self._parse_table_alias()) 4166 4167 return this 4168 4169 def _parse_version(self) -> t.Optional[exp.Version]: 4170 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4171 this = "TIMESTAMP" 4172 elif self._match(TokenType.VERSION_SNAPSHOT): 4173 this = "VERSION" 4174 else: 4175 return None 4176 4177 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4178 kind = self._prev.text.upper() 4179 start = self._parse_bitwise() 4180 self._match_texts(("TO", "AND")) 4181 end = self._parse_bitwise() 4182 expression: t.Optional[exp.Expression] = self.expression( 4183 exp.Tuple, expressions=[start, end] 4184 ) 4185 elif self._match_text_seq("CONTAINED", "IN"): 4186 kind = "CONTAINED IN" 4187 expression = self.expression( 4188 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4189 ) 4190 elif self._match(TokenType.ALL): 4191 kind = "ALL" 4192 expression = None 4193 else: 4194 self._match_text_seq("AS", "OF") 4195 kind = "AS OF" 4196 expression = self._parse_type() 4197 4198 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4199 4200 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4201 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4202 index = self._index 4203 historical_data = None 4204 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4205 this = self._prev.text.upper() 4206 kind = ( 4207 self._match(TokenType.L_PAREN) 4208 and self._match_texts(self.HISTORICAL_DATA_KIND) 4209 and self._prev.text.upper() 4210 ) 4211 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4212 4213 if expression: 4214 self._match_r_paren() 4215 historical_data = self.expression( 4216 exp.HistoricalData, this=this, kind=kind, expression=expression 4217 ) 4218 else: 4219 self._retreat(index) 4220 4221 return historical_data 4222 4223 def _parse_changes(self) -> t.Optional[exp.Changes]: 4224 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4225 return None 4226 4227 information = self._parse_var(any_token=True) 4228 self._match_r_paren() 4229 4230 return self.expression( 4231 exp.Changes, 4232 information=information, 4233 at_before=self._parse_historical_data(), 4234 end=self._parse_historical_data(), 4235 ) 4236 4237 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4238 if not self._match(TokenType.UNNEST): 4239 return None 4240 4241 expressions = self._parse_wrapped_csv(self._parse_equality) 4242 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4243 4244 alias = self._parse_table_alias() if with_alias else None 4245 4246 if alias: 4247 if self.dialect.UNNEST_COLUMN_ONLY: 4248 if alias.args.get("columns"): 4249 self.raise_error("Unexpected extra column alias in unnest.") 4250 4251 alias.set("columns", [alias.this]) 4252 alias.set("this", None) 4253 4254 columns = alias.args.get("columns") or [] 4255 if offset and len(expressions) < len(columns): 4256 offset = columns.pop() 4257 4258 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4259 self._match(TokenType.ALIAS) 4260 offset = self._parse_id_var( 4261 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4262 ) or exp.to_identifier("offset") 4263 4264 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4265 4266 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4267 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4268 if not is_derived and not ( 4269 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4270 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4271 ): 4272 return None 4273 4274 expressions = self._parse_csv(self._parse_value) 4275 alias = self._parse_table_alias() 4276 4277 if is_derived: 4278 self._match_r_paren() 4279 4280 return self.expression( 4281 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4282 ) 4283 4284 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4285 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4286 as_modifier and self._match_text_seq("USING", "SAMPLE") 4287 ): 4288 return None 4289 4290 bucket_numerator = None 4291 bucket_denominator = None 4292 bucket_field = None 4293 percent = None 4294 size = None 4295 seed = None 4296 4297 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4298 matched_l_paren = self._match(TokenType.L_PAREN) 4299 4300 if self.TABLESAMPLE_CSV: 4301 num = None 4302 expressions = self._parse_csv(self._parse_primary) 4303 else: 4304 expressions = None 4305 num = ( 4306 self._parse_factor() 4307 if self._match(TokenType.NUMBER, advance=False) 4308 else self._parse_primary() or self._parse_placeholder() 4309 ) 4310 4311 if self._match_text_seq("BUCKET"): 4312 bucket_numerator = self._parse_number() 4313 self._match_text_seq("OUT", "OF") 4314 bucket_denominator = bucket_denominator = self._parse_number() 4315 self._match(TokenType.ON) 4316 bucket_field = self._parse_field() 4317 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4318 percent = num 4319 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4320 size = num 4321 else: 4322 percent = num 4323 4324 if matched_l_paren: 4325 self._match_r_paren() 4326 4327 if self._match(TokenType.L_PAREN): 4328 method = self._parse_var(upper=True) 4329 seed = self._match(TokenType.COMMA) and self._parse_number() 4330 self._match_r_paren() 4331 elif self._match_texts(("SEED", "REPEATABLE")): 4332 seed = self._parse_wrapped(self._parse_number) 4333 4334 if not method and self.DEFAULT_SAMPLING_METHOD: 4335 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4336 4337 return self.expression( 4338 exp.TableSample, 4339 expressions=expressions, 4340 method=method, 4341 bucket_numerator=bucket_numerator, 4342 bucket_denominator=bucket_denominator, 4343 bucket_field=bucket_field, 4344 percent=percent, 4345 size=size, 4346 seed=seed, 4347 ) 4348 4349 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4350 return list(iter(self._parse_pivot, None)) or None 4351 4352 def _parse_joins(self) -> t.Iterator[exp.Join]: 4353 return iter(self._parse_join, None) 4354 4355 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4356 if not self._match(TokenType.INTO): 4357 return None 4358 4359 return self.expression( 4360 exp.UnpivotColumns, 4361 this=self._match_text_seq("NAME") and self._parse_column(), 4362 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4363 ) 4364 4365 # https://duckdb.org/docs/sql/statements/pivot 4366 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4367 def _parse_on() -> t.Optional[exp.Expression]: 4368 this = self._parse_bitwise() 4369 4370 if self._match(TokenType.IN): 4371 # PIVOT ... ON col IN (row_val1, row_val2) 4372 return self._parse_in(this) 4373 if self._match(TokenType.ALIAS, advance=False): 4374 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4375 return self._parse_alias(this) 4376 4377 return this 4378 4379 this = self._parse_table() 4380 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4381 into = self._parse_unpivot_columns() 4382 using = self._match(TokenType.USING) and self._parse_csv( 4383 lambda: self._parse_alias(self._parse_function()) 4384 ) 4385 group = self._parse_group() 4386 4387 return self.expression( 4388 exp.Pivot, 4389 this=this, 4390 expressions=expressions, 4391 using=using, 4392 group=group, 4393 unpivot=is_unpivot, 4394 into=into, 4395 ) 4396 4397 def _parse_pivot_in(self) -> exp.In: 4398 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4399 this = self._parse_select_or_expression() 4400 4401 self._match(TokenType.ALIAS) 4402 alias = self._parse_bitwise() 4403 if alias: 4404 if isinstance(alias, exp.Column) and not alias.db: 4405 alias = alias.this 4406 return self.expression(exp.PivotAlias, this=this, alias=alias) 4407 4408 return this 4409 4410 value = self._parse_column() 4411 4412 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4413 self.raise_error("Expecting IN (") 4414 4415 if self._match(TokenType.ANY): 4416 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4417 else: 4418 exprs = self._parse_csv(_parse_aliased_expression) 4419 4420 self._match_r_paren() 4421 return self.expression(exp.In, this=value, expressions=exprs) 4422 4423 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4424 func = self._parse_function() 4425 if not func: 4426 if self._prev and self._prev.token_type == TokenType.COMMA: 4427 return None 4428 self.raise_error("Expecting an aggregation function in PIVOT") 4429 4430 return self._parse_alias(func) 4431 4432 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4433 index = self._index 4434 include_nulls = None 4435 4436 if self._match(TokenType.PIVOT): 4437 unpivot = False 4438 elif self._match(TokenType.UNPIVOT): 4439 unpivot = True 4440 4441 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4442 if self._match_text_seq("INCLUDE", "NULLS"): 4443 include_nulls = True 4444 elif self._match_text_seq("EXCLUDE", "NULLS"): 4445 include_nulls = False 4446 else: 4447 return None 4448 4449 expressions = [] 4450 4451 if not self._match(TokenType.L_PAREN): 4452 self._retreat(index) 4453 return None 4454 4455 if unpivot: 4456 expressions = self._parse_csv(self._parse_column) 4457 else: 4458 expressions = self._parse_csv(self._parse_pivot_aggregation) 4459 4460 if not expressions: 4461 self.raise_error("Failed to parse PIVOT's aggregation list") 4462 4463 if not self._match(TokenType.FOR): 4464 self.raise_error("Expecting FOR") 4465 4466 fields = [] 4467 while True: 4468 field = self._try_parse(self._parse_pivot_in) 4469 if not field: 4470 break 4471 fields.append(field) 4472 4473 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4474 self._parse_bitwise 4475 ) 4476 4477 group = self._parse_group() 4478 4479 self._match_r_paren() 4480 4481 pivot = self.expression( 4482 exp.Pivot, 4483 expressions=expressions, 4484 fields=fields, 4485 unpivot=unpivot, 4486 include_nulls=include_nulls, 4487 default_on_null=default_on_null, 4488 group=group, 4489 ) 4490 4491 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4492 pivot.set("alias", self._parse_table_alias()) 4493 4494 if not unpivot: 4495 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4496 4497 columns: t.List[exp.Expression] = [] 4498 all_fields = [] 4499 for pivot_field in pivot.fields: 4500 pivot_field_expressions = pivot_field.expressions 4501 4502 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4503 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4504 continue 4505 4506 all_fields.append( 4507 [ 4508 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4509 for fld in pivot_field_expressions 4510 ] 4511 ) 4512 4513 if all_fields: 4514 if names: 4515 all_fields.append(names) 4516 4517 # Generate all possible combinations of the pivot columns 4518 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4519 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4520 for fld_parts_tuple in itertools.product(*all_fields): 4521 fld_parts = list(fld_parts_tuple) 4522 4523 if names and self.PREFIXED_PIVOT_COLUMNS: 4524 # Move the "name" to the front of the list 4525 fld_parts.insert(0, fld_parts.pop(-1)) 4526 4527 columns.append(exp.to_identifier("_".join(fld_parts))) 4528 4529 pivot.set("columns", columns) 4530 4531 return pivot 4532 4533 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4534 return [agg.alias for agg in aggregations if agg.alias] 4535 4536 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4537 if not skip_where_token and not self._match(TokenType.PREWHERE): 4538 return None 4539 4540 return self.expression( 4541 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4542 ) 4543 4544 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4545 if not skip_where_token and not self._match(TokenType.WHERE): 4546 return None 4547 4548 return self.expression( 4549 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4550 ) 4551 4552 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4553 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4554 return None 4555 comments = self._prev_comments 4556 4557 elements: t.Dict[str, t.Any] = defaultdict(list) 4558 4559 if self._match(TokenType.ALL): 4560 elements["all"] = True 4561 elif self._match(TokenType.DISTINCT): 4562 elements["all"] = False 4563 4564 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4565 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4566 4567 while True: 4568 index = self._index 4569 4570 elements["expressions"].extend( 4571 self._parse_csv( 4572 lambda: None 4573 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4574 else self._parse_assignment() 4575 ) 4576 ) 4577 4578 before_with_index = self._index 4579 with_prefix = self._match(TokenType.WITH) 4580 4581 if self._match(TokenType.ROLLUP): 4582 elements["rollup"].append( 4583 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4584 ) 4585 elif self._match(TokenType.CUBE): 4586 elements["cube"].append( 4587 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4588 ) 4589 elif self._match(TokenType.GROUPING_SETS): 4590 elements["grouping_sets"].append( 4591 self.expression( 4592 exp.GroupingSets, 4593 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4594 ) 4595 ) 4596 elif self._match_text_seq("TOTALS"): 4597 elements["totals"] = True # type: ignore 4598 4599 if before_with_index <= self._index <= before_with_index + 1: 4600 self._retreat(before_with_index) 4601 break 4602 4603 if index == self._index: 4604 break 4605 4606 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4607 4608 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4609 return self.expression( 4610 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4611 ) 4612 4613 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4614 if self._match(TokenType.L_PAREN): 4615 grouping_set = self._parse_csv(self._parse_column) 4616 self._match_r_paren() 4617 return self.expression(exp.Tuple, expressions=grouping_set) 4618 4619 return self._parse_column() 4620 4621 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4622 if not skip_having_token and not self._match(TokenType.HAVING): 4623 return None 4624 return self.expression( 4625 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4626 ) 4627 4628 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4629 if not self._match(TokenType.QUALIFY): 4630 return None 4631 return self.expression(exp.Qualify, this=self._parse_assignment()) 4632 4633 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4634 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4635 exp.Prior, this=self._parse_bitwise() 4636 ) 4637 connect = self._parse_assignment() 4638 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4639 return connect 4640 4641 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4642 if skip_start_token: 4643 start = None 4644 elif self._match(TokenType.START_WITH): 4645 start = self._parse_assignment() 4646 else: 4647 return None 4648 4649 self._match(TokenType.CONNECT_BY) 4650 nocycle = self._match_text_seq("NOCYCLE") 4651 connect = self._parse_connect_with_prior() 4652 4653 if not start and self._match(TokenType.START_WITH): 4654 start = self._parse_assignment() 4655 4656 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4657 4658 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4659 this = self._parse_id_var(any_token=True) 4660 if self._match(TokenType.ALIAS): 4661 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4662 return this 4663 4664 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4665 if self._match_text_seq("INTERPOLATE"): 4666 return self._parse_wrapped_csv(self._parse_name_as_expression) 4667 return None 4668 4669 def _parse_order( 4670 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4671 ) -> t.Optional[exp.Expression]: 4672 siblings = None 4673 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4674 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4675 return this 4676 4677 siblings = True 4678 4679 return self.expression( 4680 exp.Order, 4681 comments=self._prev_comments, 4682 this=this, 4683 expressions=self._parse_csv(self._parse_ordered), 4684 siblings=siblings, 4685 ) 4686 4687 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4688 if not self._match(token): 4689 return None 4690 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4691 4692 def _parse_ordered( 4693 self, parse_method: t.Optional[t.Callable] = None 4694 ) -> t.Optional[exp.Ordered]: 4695 this = parse_method() if parse_method else self._parse_assignment() 4696 if not this: 4697 return None 4698 4699 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4700 this = exp.var("ALL") 4701 4702 asc = self._match(TokenType.ASC) 4703 desc = self._match(TokenType.DESC) or (asc and False) 4704 4705 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4706 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4707 4708 nulls_first = is_nulls_first or False 4709 explicitly_null_ordered = is_nulls_first or is_nulls_last 4710 4711 if ( 4712 not explicitly_null_ordered 4713 and ( 4714 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4715 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4716 ) 4717 and self.dialect.NULL_ORDERING != "nulls_are_last" 4718 ): 4719 nulls_first = True 4720 4721 if self._match_text_seq("WITH", "FILL"): 4722 with_fill = self.expression( 4723 exp.WithFill, 4724 **{ # type: ignore 4725 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4726 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4727 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4728 "interpolate": self._parse_interpolate(), 4729 }, 4730 ) 4731 else: 4732 with_fill = None 4733 4734 return self.expression( 4735 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4736 ) 4737 4738 def _parse_limit_options(self) -> exp.LimitOptions: 4739 percent = self._match(TokenType.PERCENT) 4740 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4741 self._match_text_seq("ONLY") 4742 with_ties = self._match_text_seq("WITH", "TIES") 4743 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4744 4745 def _parse_limit( 4746 self, 4747 this: t.Optional[exp.Expression] = None, 4748 top: bool = False, 4749 skip_limit_token: bool = False, 4750 ) -> t.Optional[exp.Expression]: 4751 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4752 comments = self._prev_comments 4753 if top: 4754 limit_paren = self._match(TokenType.L_PAREN) 4755 expression = self._parse_term() if limit_paren else self._parse_number() 4756 4757 if limit_paren: 4758 self._match_r_paren() 4759 4760 limit_options = self._parse_limit_options() 4761 else: 4762 limit_options = None 4763 expression = self._parse_term() 4764 4765 if self._match(TokenType.COMMA): 4766 offset = expression 4767 expression = self._parse_term() 4768 else: 4769 offset = None 4770 4771 limit_exp = self.expression( 4772 exp.Limit, 4773 this=this, 4774 expression=expression, 4775 offset=offset, 4776 comments=comments, 4777 limit_options=limit_options, 4778 expressions=self._parse_limit_by(), 4779 ) 4780 4781 return limit_exp 4782 4783 if self._match(TokenType.FETCH): 4784 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4785 direction = self._prev.text.upper() if direction else "FIRST" 4786 4787 count = self._parse_field(tokens=self.FETCH_TOKENS) 4788 4789 return self.expression( 4790 exp.Fetch, 4791 direction=direction, 4792 count=count, 4793 limit_options=self._parse_limit_options(), 4794 ) 4795 4796 return this 4797 4798 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4799 if not self._match(TokenType.OFFSET): 4800 return this 4801 4802 count = self._parse_term() 4803 self._match_set((TokenType.ROW, TokenType.ROWS)) 4804 4805 return self.expression( 4806 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4807 ) 4808 4809 def _can_parse_limit_or_offset(self) -> bool: 4810 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4811 return False 4812 4813 index = self._index 4814 result = bool( 4815 self._try_parse(self._parse_limit, retreat=True) 4816 or self._try_parse(self._parse_offset, retreat=True) 4817 ) 4818 self._retreat(index) 4819 return result 4820 4821 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4822 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4823 4824 def _parse_locks(self) -> t.List[exp.Lock]: 4825 locks = [] 4826 while True: 4827 update, key = None, None 4828 if self._match_text_seq("FOR", "UPDATE"): 4829 update = True 4830 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4831 "LOCK", "IN", "SHARE", "MODE" 4832 ): 4833 update = False 4834 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4835 update, key = False, True 4836 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4837 update, key = True, True 4838 else: 4839 break 4840 4841 expressions = None 4842 if self._match_text_seq("OF"): 4843 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4844 4845 wait: t.Optional[bool | exp.Expression] = None 4846 if self._match_text_seq("NOWAIT"): 4847 wait = True 4848 elif self._match_text_seq("WAIT"): 4849 wait = self._parse_primary() 4850 elif self._match_text_seq("SKIP", "LOCKED"): 4851 wait = False 4852 4853 locks.append( 4854 self.expression( 4855 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4856 ) 4857 ) 4858 4859 return locks 4860 4861 def parse_set_operation( 4862 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4863 ) -> t.Optional[exp.Expression]: 4864 start = self._index 4865 _, side_token, kind_token = self._parse_join_parts() 4866 4867 side = side_token.text if side_token else None 4868 kind = kind_token.text if kind_token else None 4869 4870 if not self._match_set(self.SET_OPERATIONS): 4871 self._retreat(start) 4872 return None 4873 4874 token_type = self._prev.token_type 4875 4876 if token_type == TokenType.UNION: 4877 operation: t.Type[exp.SetOperation] = exp.Union 4878 elif token_type == TokenType.EXCEPT: 4879 operation = exp.Except 4880 else: 4881 operation = exp.Intersect 4882 4883 comments = self._prev.comments 4884 4885 if self._match(TokenType.DISTINCT): 4886 distinct: t.Optional[bool] = True 4887 elif self._match(TokenType.ALL): 4888 distinct = False 4889 else: 4890 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4891 if distinct is None: 4892 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4893 4894 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4895 "STRICT", "CORRESPONDING" 4896 ) 4897 if self._match_text_seq("CORRESPONDING"): 4898 by_name = True 4899 if not side and not kind: 4900 kind = "INNER" 4901 4902 on_column_list = None 4903 if by_name and self._match_texts(("ON", "BY")): 4904 on_column_list = self._parse_wrapped_csv(self._parse_column) 4905 4906 expression = self._parse_select( 4907 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4908 ) 4909 4910 return self.expression( 4911 operation, 4912 comments=comments, 4913 this=this, 4914 distinct=distinct, 4915 by_name=by_name, 4916 expression=expression, 4917 side=side, 4918 kind=kind, 4919 on=on_column_list, 4920 ) 4921 4922 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4923 while this: 4924 setop = self.parse_set_operation(this) 4925 if not setop: 4926 break 4927 this = setop 4928 4929 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4930 expression = this.expression 4931 4932 if expression: 4933 for arg in self.SET_OP_MODIFIERS: 4934 expr = expression.args.get(arg) 4935 if expr: 4936 this.set(arg, expr.pop()) 4937 4938 return this 4939 4940 def _parse_expression(self) -> t.Optional[exp.Expression]: 4941 return self._parse_alias(self._parse_assignment()) 4942 4943 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4944 this = self._parse_disjunction() 4945 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4946 # This allows us to parse <non-identifier token> := <expr> 4947 this = exp.column( 4948 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4949 ) 4950 4951 while self._match_set(self.ASSIGNMENT): 4952 if isinstance(this, exp.Column) and len(this.parts) == 1: 4953 this = this.this 4954 4955 this = self.expression( 4956 self.ASSIGNMENT[self._prev.token_type], 4957 this=this, 4958 comments=self._prev_comments, 4959 expression=self._parse_assignment(), 4960 ) 4961 4962 return this 4963 4964 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4965 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4966 4967 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4968 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4969 4970 def _parse_equality(self) -> t.Optional[exp.Expression]: 4971 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4972 4973 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4974 return self._parse_tokens(self._parse_range, self.COMPARISON) 4975 4976 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4977 this = this or self._parse_bitwise() 4978 negate = self._match(TokenType.NOT) 4979 4980 if self._match_set(self.RANGE_PARSERS): 4981 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4982 if not expression: 4983 return this 4984 4985 this = expression 4986 elif self._match(TokenType.ISNULL): 4987 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4988 4989 # Postgres supports ISNULL and NOTNULL for conditions. 4990 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4991 if self._match(TokenType.NOTNULL): 4992 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4993 this = self.expression(exp.Not, this=this) 4994 4995 if negate: 4996 this = self._negate_range(this) 4997 4998 if self._match(TokenType.IS): 4999 this = self._parse_is(this) 5000 5001 return this 5002 5003 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5004 if not this: 5005 return this 5006 5007 return self.expression(exp.Not, this=this) 5008 5009 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5010 index = self._index - 1 5011 negate = self._match(TokenType.NOT) 5012 5013 if self._match_text_seq("DISTINCT", "FROM"): 5014 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5015 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5016 5017 if self._match(TokenType.JSON): 5018 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5019 5020 if self._match_text_seq("WITH"): 5021 _with = True 5022 elif self._match_text_seq("WITHOUT"): 5023 _with = False 5024 else: 5025 _with = None 5026 5027 unique = self._match(TokenType.UNIQUE) 5028 self._match_text_seq("KEYS") 5029 expression: t.Optional[exp.Expression] = self.expression( 5030 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5031 ) 5032 else: 5033 expression = self._parse_primary() or self._parse_null() 5034 if not expression: 5035 self._retreat(index) 5036 return None 5037 5038 this = self.expression(exp.Is, this=this, expression=expression) 5039 return self.expression(exp.Not, this=this) if negate else this 5040 5041 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5042 unnest = self._parse_unnest(with_alias=False) 5043 if unnest: 5044 this = self.expression(exp.In, this=this, unnest=unnest) 5045 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5046 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5047 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5048 5049 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5050 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5051 else: 5052 this = self.expression(exp.In, this=this, expressions=expressions) 5053 5054 if matched_l_paren: 5055 self._match_r_paren(this) 5056 elif not self._match(TokenType.R_BRACKET, expression=this): 5057 self.raise_error("Expecting ]") 5058 else: 5059 this = self.expression(exp.In, this=this, field=self._parse_column()) 5060 5061 return this 5062 5063 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5064 symmetric = None 5065 if self._match_text_seq("SYMMETRIC"): 5066 symmetric = True 5067 elif self._match_text_seq("ASYMMETRIC"): 5068 symmetric = False 5069 5070 low = self._parse_bitwise() 5071 self._match(TokenType.AND) 5072 high = self._parse_bitwise() 5073 5074 return self.expression( 5075 exp.Between, 5076 this=this, 5077 low=low, 5078 high=high, 5079 symmetric=symmetric, 5080 ) 5081 5082 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5083 if not self._match(TokenType.ESCAPE): 5084 return this 5085 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5086 5087 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5088 index = self._index 5089 5090 if not self._match(TokenType.INTERVAL) and match_interval: 5091 return None 5092 5093 if self._match(TokenType.STRING, advance=False): 5094 this = self._parse_primary() 5095 else: 5096 this = self._parse_term() 5097 5098 if not this or ( 5099 isinstance(this, exp.Column) 5100 and not this.table 5101 and not this.this.quoted 5102 and this.name.upper() == "IS" 5103 ): 5104 self._retreat(index) 5105 return None 5106 5107 unit = self._parse_function() or ( 5108 not self._match(TokenType.ALIAS, advance=False) 5109 and self._parse_var(any_token=True, upper=True) 5110 ) 5111 5112 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5113 # each INTERVAL expression into this canonical form so it's easy to transpile 5114 if this and this.is_number: 5115 this = exp.Literal.string(this.to_py()) 5116 elif this and this.is_string: 5117 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5118 if parts and unit: 5119 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5120 unit = None 5121 self._retreat(self._index - 1) 5122 5123 if len(parts) == 1: 5124 this = exp.Literal.string(parts[0][0]) 5125 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5126 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5127 unit = self.expression( 5128 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5129 ) 5130 5131 interval = self.expression(exp.Interval, this=this, unit=unit) 5132 5133 index = self._index 5134 self._match(TokenType.PLUS) 5135 5136 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5137 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5138 return self.expression( 5139 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5140 ) 5141 5142 self._retreat(index) 5143 return interval 5144 5145 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5146 this = self._parse_term() 5147 5148 while True: 5149 if self._match_set(self.BITWISE): 5150 this = self.expression( 5151 self.BITWISE[self._prev.token_type], 5152 this=this, 5153 expression=self._parse_term(), 5154 ) 5155 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5156 this = self.expression( 5157 exp.DPipe, 5158 this=this, 5159 expression=self._parse_term(), 5160 safe=not self.dialect.STRICT_STRING_CONCAT, 5161 ) 5162 elif self._match(TokenType.DQMARK): 5163 this = self.expression( 5164 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5165 ) 5166 elif self._match_pair(TokenType.LT, TokenType.LT): 5167 this = self.expression( 5168 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5169 ) 5170 elif self._match_pair(TokenType.GT, TokenType.GT): 5171 this = self.expression( 5172 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5173 ) 5174 else: 5175 break 5176 5177 return this 5178 5179 def _parse_term(self) -> t.Optional[exp.Expression]: 5180 this = self._parse_factor() 5181 5182 while self._match_set(self.TERM): 5183 klass = self.TERM[self._prev.token_type] 5184 comments = self._prev_comments 5185 expression = self._parse_factor() 5186 5187 this = self.expression(klass, this=this, comments=comments, expression=expression) 5188 5189 if isinstance(this, exp.Collate): 5190 expr = this.expression 5191 5192 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5193 # fallback to Identifier / Var 5194 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5195 ident = expr.this 5196 if isinstance(ident, exp.Identifier): 5197 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5198 5199 return this 5200 5201 def _parse_factor(self) -> t.Optional[exp.Expression]: 5202 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5203 this = parse_method() 5204 5205 while self._match_set(self.FACTOR): 5206 klass = self.FACTOR[self._prev.token_type] 5207 comments = self._prev_comments 5208 expression = parse_method() 5209 5210 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5211 self._retreat(self._index - 1) 5212 return this 5213 5214 this = self.expression(klass, this=this, comments=comments, expression=expression) 5215 5216 if isinstance(this, exp.Div): 5217 this.args["typed"] = self.dialect.TYPED_DIVISION 5218 this.args["safe"] = self.dialect.SAFE_DIVISION 5219 5220 return this 5221 5222 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5223 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5224 5225 def _parse_unary(self) -> t.Optional[exp.Expression]: 5226 if self._match_set(self.UNARY_PARSERS): 5227 return self.UNARY_PARSERS[self._prev.token_type](self) 5228 return self._parse_at_time_zone(self._parse_type()) 5229 5230 def _parse_type( 5231 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5232 ) -> t.Optional[exp.Expression]: 5233 interval = parse_interval and self._parse_interval() 5234 if interval: 5235 return interval 5236 5237 index = self._index 5238 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5239 5240 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5241 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5242 if isinstance(data_type, exp.Cast): 5243 # This constructor can contain ops directly after it, for instance struct unnesting: 5244 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5245 return self._parse_column_ops(data_type) 5246 5247 if data_type: 5248 index2 = self._index 5249 this = self._parse_primary() 5250 5251 if isinstance(this, exp.Literal): 5252 literal = this.name 5253 this = self._parse_column_ops(this) 5254 5255 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5256 if parser: 5257 return parser(self, this, data_type) 5258 5259 if ( 5260 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5261 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5262 and TIME_ZONE_RE.search(literal) 5263 ): 5264 data_type = exp.DataType.build("TIMESTAMPTZ") 5265 5266 return self.expression(exp.Cast, this=this, to=data_type) 5267 5268 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5269 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5270 # 5271 # If the index difference here is greater than 1, that means the parser itself must have 5272 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5273 # 5274 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5275 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5276 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5277 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5278 # 5279 # In these cases, we don't really want to return the converted type, but instead retreat 5280 # and try to parse a Column or Identifier in the section below. 5281 if data_type.expressions and index2 - index > 1: 5282 self._retreat(index2) 5283 return self._parse_column_ops(data_type) 5284 5285 self._retreat(index) 5286 5287 if fallback_to_identifier: 5288 return self._parse_id_var() 5289 5290 this = self._parse_column() 5291 return this and self._parse_column_ops(this) 5292 5293 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5294 this = self._parse_type() 5295 if not this: 5296 return None 5297 5298 if isinstance(this, exp.Column) and not this.table: 5299 this = exp.var(this.name.upper()) 5300 5301 return self.expression( 5302 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5303 ) 5304 5305 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5306 type_name = identifier.name 5307 5308 while self._match(TokenType.DOT): 5309 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5310 5311 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5312 5313 def _parse_types( 5314 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5315 ) -> t.Optional[exp.Expression]: 5316 index = self._index 5317 5318 this: t.Optional[exp.Expression] = None 5319 prefix = self._match_text_seq("SYSUDTLIB", ".") 5320 5321 if self._match_set(self.TYPE_TOKENS): 5322 type_token = self._prev.token_type 5323 else: 5324 type_token = None 5325 identifier = allow_identifiers and self._parse_id_var( 5326 any_token=False, tokens=(TokenType.VAR,) 5327 ) 5328 if isinstance(identifier, exp.Identifier): 5329 try: 5330 tokens = self.dialect.tokenize(identifier.name) 5331 except TokenError: 5332 tokens = None 5333 5334 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5335 type_token = tokens[0].token_type 5336 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5337 this = self._parse_user_defined_type(identifier) 5338 else: 5339 self._retreat(self._index - 1) 5340 return None 5341 else: 5342 return None 5343 5344 if type_token == TokenType.PSEUDO_TYPE: 5345 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5346 5347 if type_token == TokenType.OBJECT_IDENTIFIER: 5348 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5349 5350 # https://materialize.com/docs/sql/types/map/ 5351 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5352 key_type = self._parse_types( 5353 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5354 ) 5355 if not self._match(TokenType.FARROW): 5356 self._retreat(index) 5357 return None 5358 5359 value_type = self._parse_types( 5360 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5361 ) 5362 if not self._match(TokenType.R_BRACKET): 5363 self._retreat(index) 5364 return None 5365 5366 return exp.DataType( 5367 this=exp.DataType.Type.MAP, 5368 expressions=[key_type, value_type], 5369 nested=True, 5370 prefix=prefix, 5371 ) 5372 5373 nested = type_token in self.NESTED_TYPE_TOKENS 5374 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5375 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5376 expressions = None 5377 maybe_func = False 5378 5379 if self._match(TokenType.L_PAREN): 5380 if is_struct: 5381 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5382 elif nested: 5383 expressions = self._parse_csv( 5384 lambda: self._parse_types( 5385 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5386 ) 5387 ) 5388 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5389 this = expressions[0] 5390 this.set("nullable", True) 5391 self._match_r_paren() 5392 return this 5393 elif type_token in self.ENUM_TYPE_TOKENS: 5394 expressions = self._parse_csv(self._parse_equality) 5395 elif is_aggregate: 5396 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5397 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5398 ) 5399 if not func_or_ident: 5400 return None 5401 expressions = [func_or_ident] 5402 if self._match(TokenType.COMMA): 5403 expressions.extend( 5404 self._parse_csv( 5405 lambda: self._parse_types( 5406 check_func=check_func, 5407 schema=schema, 5408 allow_identifiers=allow_identifiers, 5409 ) 5410 ) 5411 ) 5412 else: 5413 expressions = self._parse_csv(self._parse_type_size) 5414 5415 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5416 if type_token == TokenType.VECTOR and len(expressions) == 2: 5417 expressions = self._parse_vector_expressions(expressions) 5418 5419 if not self._match(TokenType.R_PAREN): 5420 self._retreat(index) 5421 return None 5422 5423 maybe_func = True 5424 5425 values: t.Optional[t.List[exp.Expression]] = None 5426 5427 if nested and self._match(TokenType.LT): 5428 if is_struct: 5429 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5430 else: 5431 expressions = self._parse_csv( 5432 lambda: self._parse_types( 5433 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5434 ) 5435 ) 5436 5437 if not self._match(TokenType.GT): 5438 self.raise_error("Expecting >") 5439 5440 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5441 values = self._parse_csv(self._parse_assignment) 5442 if not values and is_struct: 5443 values = None 5444 self._retreat(self._index - 1) 5445 else: 5446 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5447 5448 if type_token in self.TIMESTAMPS: 5449 if self._match_text_seq("WITH", "TIME", "ZONE"): 5450 maybe_func = False 5451 tz_type = ( 5452 exp.DataType.Type.TIMETZ 5453 if type_token in self.TIMES 5454 else exp.DataType.Type.TIMESTAMPTZ 5455 ) 5456 this = exp.DataType(this=tz_type, expressions=expressions) 5457 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5458 maybe_func = False 5459 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5460 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5461 maybe_func = False 5462 elif type_token == TokenType.INTERVAL: 5463 unit = self._parse_var(upper=True) 5464 if unit: 5465 if self._match_text_seq("TO"): 5466 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5467 5468 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5469 else: 5470 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5471 elif type_token == TokenType.VOID: 5472 this = exp.DataType(this=exp.DataType.Type.NULL) 5473 5474 if maybe_func and check_func: 5475 index2 = self._index 5476 peek = self._parse_string() 5477 5478 if not peek: 5479 self._retreat(index) 5480 return None 5481 5482 self._retreat(index2) 5483 5484 if not this: 5485 if self._match_text_seq("UNSIGNED"): 5486 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5487 if not unsigned_type_token: 5488 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5489 5490 type_token = unsigned_type_token or type_token 5491 5492 this = exp.DataType( 5493 this=exp.DataType.Type[type_token.value], 5494 expressions=expressions, 5495 nested=nested, 5496 prefix=prefix, 5497 ) 5498 5499 # Empty arrays/structs are allowed 5500 if values is not None: 5501 cls = exp.Struct if is_struct else exp.Array 5502 this = exp.cast(cls(expressions=values), this, copy=False) 5503 5504 elif expressions: 5505 this.set("expressions", expressions) 5506 5507 # https://materialize.com/docs/sql/types/list/#type-name 5508 while self._match(TokenType.LIST): 5509 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5510 5511 index = self._index 5512 5513 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5514 matched_array = self._match(TokenType.ARRAY) 5515 5516 while self._curr: 5517 datatype_token = self._prev.token_type 5518 matched_l_bracket = self._match(TokenType.L_BRACKET) 5519 5520 if (not matched_l_bracket and not matched_array) or ( 5521 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5522 ): 5523 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5524 # not to be confused with the fixed size array parsing 5525 break 5526 5527 matched_array = False 5528 values = self._parse_csv(self._parse_assignment) or None 5529 if ( 5530 values 5531 and not schema 5532 and ( 5533 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5534 ) 5535 ): 5536 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5537 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5538 self._retreat(index) 5539 break 5540 5541 this = exp.DataType( 5542 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5543 ) 5544 self._match(TokenType.R_BRACKET) 5545 5546 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5547 converter = self.TYPE_CONVERTERS.get(this.this) 5548 if converter: 5549 this = converter(t.cast(exp.DataType, this)) 5550 5551 return this 5552 5553 def _parse_vector_expressions( 5554 self, expressions: t.List[exp.Expression] 5555 ) -> t.List[exp.Expression]: 5556 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5557 5558 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5559 index = self._index 5560 5561 if ( 5562 self._curr 5563 and self._next 5564 and self._curr.token_type in self.TYPE_TOKENS 5565 and self._next.token_type in self.TYPE_TOKENS 5566 ): 5567 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5568 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5569 this = self._parse_id_var() 5570 else: 5571 this = ( 5572 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5573 or self._parse_id_var() 5574 ) 5575 5576 self._match(TokenType.COLON) 5577 5578 if ( 5579 type_required 5580 and not isinstance(this, exp.DataType) 5581 and not self._match_set(self.TYPE_TOKENS, advance=False) 5582 ): 5583 self._retreat(index) 5584 return self._parse_types() 5585 5586 return self._parse_column_def(this) 5587 5588 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5589 if not self._match_text_seq("AT", "TIME", "ZONE"): 5590 return this 5591 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5592 5593 def _parse_column(self) -> t.Optional[exp.Expression]: 5594 this = self._parse_column_reference() 5595 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5596 5597 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5598 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5599 5600 return column 5601 5602 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5603 this = self._parse_field() 5604 if ( 5605 not this 5606 and self._match(TokenType.VALUES, advance=False) 5607 and self.VALUES_FOLLOWED_BY_PAREN 5608 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5609 ): 5610 this = self._parse_id_var() 5611 5612 if isinstance(this, exp.Identifier): 5613 # We bubble up comments from the Identifier to the Column 5614 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5615 5616 return this 5617 5618 def _parse_colon_as_variant_extract( 5619 self, this: t.Optional[exp.Expression] 5620 ) -> t.Optional[exp.Expression]: 5621 casts = [] 5622 json_path = [] 5623 escape = None 5624 5625 while self._match(TokenType.COLON): 5626 start_index = self._index 5627 5628 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5629 path = self._parse_column_ops( 5630 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5631 ) 5632 5633 # The cast :: operator has a lower precedence than the extraction operator :, so 5634 # we rearrange the AST appropriately to avoid casting the JSON path 5635 while isinstance(path, exp.Cast): 5636 casts.append(path.to) 5637 path = path.this 5638 5639 if casts: 5640 dcolon_offset = next( 5641 i 5642 for i, t in enumerate(self._tokens[start_index:]) 5643 if t.token_type == TokenType.DCOLON 5644 ) 5645 end_token = self._tokens[start_index + dcolon_offset - 1] 5646 else: 5647 end_token = self._prev 5648 5649 if path: 5650 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5651 # it'll roundtrip to a string literal in GET_PATH 5652 if isinstance(path, exp.Identifier) and path.quoted: 5653 escape = True 5654 5655 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5656 5657 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5658 # Databricks transforms it back to the colon/dot notation 5659 if json_path: 5660 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5661 5662 if json_path_expr: 5663 json_path_expr.set("escape", escape) 5664 5665 this = self.expression( 5666 exp.JSONExtract, 5667 this=this, 5668 expression=json_path_expr, 5669 variant_extract=True, 5670 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5671 ) 5672 5673 while casts: 5674 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5675 5676 return this 5677 5678 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5679 return self._parse_types() 5680 5681 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5682 this = self._parse_bracket(this) 5683 5684 while self._match_set(self.COLUMN_OPERATORS): 5685 op_token = self._prev.token_type 5686 op = self.COLUMN_OPERATORS.get(op_token) 5687 5688 if op_token in self.CAST_COLUMN_OPERATORS: 5689 field = self._parse_dcolon() 5690 if not field: 5691 self.raise_error("Expected type") 5692 elif op and self._curr: 5693 field = self._parse_column_reference() or self._parse_bracket() 5694 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5695 field = self._parse_column_ops(field) 5696 else: 5697 field = self._parse_field(any_token=True, anonymous_func=True) 5698 5699 # Function calls can be qualified, e.g., x.y.FOO() 5700 # This converts the final AST to a series of Dots leading to the function call 5701 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5702 if isinstance(field, (exp.Func, exp.Window)) and this: 5703 this = this.transform( 5704 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5705 ) 5706 5707 if op: 5708 this = op(self, this, field) 5709 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5710 this = self.expression( 5711 exp.Column, 5712 comments=this.comments, 5713 this=field, 5714 table=this.this, 5715 db=this.args.get("table"), 5716 catalog=this.args.get("db"), 5717 ) 5718 elif isinstance(field, exp.Window): 5719 # Move the exp.Dot's to the window's function 5720 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5721 field.set("this", window_func) 5722 this = field 5723 else: 5724 this = self.expression(exp.Dot, this=this, expression=field) 5725 5726 if field and field.comments: 5727 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5728 5729 this = self._parse_bracket(this) 5730 5731 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5732 5733 def _parse_paren(self) -> t.Optional[exp.Expression]: 5734 if not self._match(TokenType.L_PAREN): 5735 return None 5736 5737 comments = self._prev_comments 5738 query = self._parse_select() 5739 5740 if query: 5741 expressions = [query] 5742 else: 5743 expressions = self._parse_expressions() 5744 5745 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5746 5747 if not this and self._match(TokenType.R_PAREN, advance=False): 5748 this = self.expression(exp.Tuple) 5749 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5750 this = self._parse_subquery(this=this, parse_alias=False) 5751 elif isinstance(this, exp.Subquery): 5752 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5753 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5754 this = self.expression(exp.Tuple, expressions=expressions) 5755 else: 5756 this = self.expression(exp.Paren, this=this) 5757 5758 if this: 5759 this.add_comments(comments) 5760 5761 self._match_r_paren(expression=this) 5762 return this 5763 5764 def _parse_primary(self) -> t.Optional[exp.Expression]: 5765 if self._match_set(self.PRIMARY_PARSERS): 5766 token_type = self._prev.token_type 5767 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5768 5769 if token_type == TokenType.STRING: 5770 expressions = [primary] 5771 while self._match(TokenType.STRING): 5772 expressions.append(exp.Literal.string(self._prev.text)) 5773 5774 if len(expressions) > 1: 5775 return self.expression(exp.Concat, expressions=expressions) 5776 5777 return primary 5778 5779 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5780 return exp.Literal.number(f"0.{self._prev.text}") 5781 5782 return self._parse_paren() 5783 5784 def _parse_field( 5785 self, 5786 any_token: bool = False, 5787 tokens: t.Optional[t.Collection[TokenType]] = None, 5788 anonymous_func: bool = False, 5789 ) -> t.Optional[exp.Expression]: 5790 if anonymous_func: 5791 field = ( 5792 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5793 or self._parse_primary() 5794 ) 5795 else: 5796 field = self._parse_primary() or self._parse_function( 5797 anonymous=anonymous_func, any_token=any_token 5798 ) 5799 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5800 5801 def _parse_function( 5802 self, 5803 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5804 anonymous: bool = False, 5805 optional_parens: bool = True, 5806 any_token: bool = False, 5807 ) -> t.Optional[exp.Expression]: 5808 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5809 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5810 fn_syntax = False 5811 if ( 5812 self._match(TokenType.L_BRACE, advance=False) 5813 and self._next 5814 and self._next.text.upper() == "FN" 5815 ): 5816 self._advance(2) 5817 fn_syntax = True 5818 5819 func = self._parse_function_call( 5820 functions=functions, 5821 anonymous=anonymous, 5822 optional_parens=optional_parens, 5823 any_token=any_token, 5824 ) 5825 5826 if fn_syntax: 5827 self._match(TokenType.R_BRACE) 5828 5829 return func 5830 5831 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5832 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5833 5834 def _parse_function_call( 5835 self, 5836 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5837 anonymous: bool = False, 5838 optional_parens: bool = True, 5839 any_token: bool = False, 5840 ) -> t.Optional[exp.Expression]: 5841 if not self._curr: 5842 return None 5843 5844 comments = self._curr.comments 5845 prev = self._prev 5846 token = self._curr 5847 token_type = self._curr.token_type 5848 this = self._curr.text 5849 upper = this.upper() 5850 5851 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5852 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5853 self._advance() 5854 return self._parse_window(parser(self)) 5855 5856 if not self._next or self._next.token_type != TokenType.L_PAREN: 5857 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5858 self._advance() 5859 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5860 5861 return None 5862 5863 if any_token: 5864 if token_type in self.RESERVED_TOKENS: 5865 return None 5866 elif token_type not in self.FUNC_TOKENS: 5867 return None 5868 5869 self._advance(2) 5870 5871 parser = self.FUNCTION_PARSERS.get(upper) 5872 if parser and not anonymous: 5873 this = parser(self) 5874 else: 5875 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5876 5877 if subquery_predicate: 5878 expr = None 5879 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5880 expr = self._parse_select() 5881 self._match_r_paren() 5882 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5883 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5884 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5885 self._advance(-1) 5886 expr = self._parse_bitwise() 5887 5888 if expr: 5889 return self.expression(subquery_predicate, comments=comments, this=expr) 5890 5891 if functions is None: 5892 functions = self.FUNCTIONS 5893 5894 function = functions.get(upper) 5895 known_function = function and not anonymous 5896 5897 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5898 args = self._parse_function_args(alias) 5899 5900 post_func_comments = self._curr and self._curr.comments 5901 if known_function and post_func_comments: 5902 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5903 # call we'll construct it as exp.Anonymous, even if it's "known" 5904 if any( 5905 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5906 for comment in post_func_comments 5907 ): 5908 known_function = False 5909 5910 if alias and known_function: 5911 args = self._kv_to_prop_eq(args) 5912 5913 if known_function: 5914 func_builder = t.cast(t.Callable, function) 5915 5916 if "dialect" in func_builder.__code__.co_varnames: 5917 func = func_builder(args, dialect=self.dialect) 5918 else: 5919 func = func_builder(args) 5920 5921 func = self.validate_expression(func, args) 5922 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5923 func.meta["name"] = this 5924 5925 this = func 5926 else: 5927 if token_type == TokenType.IDENTIFIER: 5928 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5929 5930 this = self.expression(exp.Anonymous, this=this, expressions=args) 5931 this = this.update_positions(token) 5932 5933 if isinstance(this, exp.Expression): 5934 this.add_comments(comments) 5935 5936 self._match_r_paren(this) 5937 return self._parse_window(this) 5938 5939 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5940 return expression 5941 5942 def _kv_to_prop_eq( 5943 self, expressions: t.List[exp.Expression], parse_map: bool = False 5944 ) -> t.List[exp.Expression]: 5945 transformed = [] 5946 5947 for index, e in enumerate(expressions): 5948 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5949 if isinstance(e, exp.Alias): 5950 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5951 5952 if not isinstance(e, exp.PropertyEQ): 5953 e = self.expression( 5954 exp.PropertyEQ, 5955 this=e.this if parse_map else exp.to_identifier(e.this.name), 5956 expression=e.expression, 5957 ) 5958 5959 if isinstance(e.this, exp.Column): 5960 e.this.replace(e.this.this) 5961 else: 5962 e = self._to_prop_eq(e, index) 5963 5964 transformed.append(e) 5965 5966 return transformed 5967 5968 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5969 return self._parse_statement() 5970 5971 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5972 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5973 5974 def _parse_user_defined_function( 5975 self, kind: t.Optional[TokenType] = None 5976 ) -> t.Optional[exp.Expression]: 5977 this = self._parse_table_parts(schema=True) 5978 5979 if not self._match(TokenType.L_PAREN): 5980 return this 5981 5982 expressions = self._parse_csv(self._parse_function_parameter) 5983 self._match_r_paren() 5984 return self.expression( 5985 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5986 ) 5987 5988 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5989 literal = self._parse_primary() 5990 if literal: 5991 return self.expression(exp.Introducer, this=token.text, expression=literal) 5992 5993 return self._identifier_expression(token) 5994 5995 def _parse_session_parameter(self) -> exp.SessionParameter: 5996 kind = None 5997 this = self._parse_id_var() or self._parse_primary() 5998 5999 if this and self._match(TokenType.DOT): 6000 kind = this.name 6001 this = self._parse_var() or self._parse_primary() 6002 6003 return self.expression(exp.SessionParameter, this=this, kind=kind) 6004 6005 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6006 return self._parse_id_var() 6007 6008 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6009 index = self._index 6010 6011 if self._match(TokenType.L_PAREN): 6012 expressions = t.cast( 6013 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6014 ) 6015 6016 if not self._match(TokenType.R_PAREN): 6017 self._retreat(index) 6018 else: 6019 expressions = [self._parse_lambda_arg()] 6020 6021 if self._match_set(self.LAMBDAS): 6022 return self.LAMBDAS[self._prev.token_type](self, expressions) 6023 6024 self._retreat(index) 6025 6026 this: t.Optional[exp.Expression] 6027 6028 if self._match(TokenType.DISTINCT): 6029 this = self.expression( 6030 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6031 ) 6032 else: 6033 this = self._parse_select_or_expression(alias=alias) 6034 6035 return self._parse_limit( 6036 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6037 ) 6038 6039 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6040 index = self._index 6041 if not self._match(TokenType.L_PAREN): 6042 return this 6043 6044 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6045 # expr can be of both types 6046 if self._match_set(self.SELECT_START_TOKENS): 6047 self._retreat(index) 6048 return this 6049 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6050 self._match_r_paren() 6051 return self.expression(exp.Schema, this=this, expressions=args) 6052 6053 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6054 return self._parse_column_def(self._parse_field(any_token=True)) 6055 6056 def _parse_column_def( 6057 self, this: t.Optional[exp.Expression], computed_column: bool = True 6058 ) -> t.Optional[exp.Expression]: 6059 # column defs are not really columns, they're identifiers 6060 if isinstance(this, exp.Column): 6061 this = this.this 6062 6063 if not computed_column: 6064 self._match(TokenType.ALIAS) 6065 6066 kind = self._parse_types(schema=True) 6067 6068 if self._match_text_seq("FOR", "ORDINALITY"): 6069 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6070 6071 constraints: t.List[exp.Expression] = [] 6072 6073 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6074 ("ALIAS", "MATERIALIZED") 6075 ): 6076 persisted = self._prev.text.upper() == "MATERIALIZED" 6077 constraint_kind = exp.ComputedColumnConstraint( 6078 this=self._parse_assignment(), 6079 persisted=persisted or self._match_text_seq("PERSISTED"), 6080 data_type=exp.Var(this="AUTO") 6081 if self._match_text_seq("AUTO") 6082 else self._parse_types(), 6083 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6084 ) 6085 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6086 elif ( 6087 kind 6088 and self._match(TokenType.ALIAS, advance=False) 6089 and ( 6090 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6091 or (self._next and self._next.token_type == TokenType.L_PAREN) 6092 ) 6093 ): 6094 self._advance() 6095 constraints.append( 6096 self.expression( 6097 exp.ColumnConstraint, 6098 kind=exp.ComputedColumnConstraint( 6099 this=self._parse_disjunction(), 6100 persisted=self._match_texts(("STORED", "VIRTUAL")) 6101 and self._prev.text.upper() == "STORED", 6102 ), 6103 ) 6104 ) 6105 6106 while True: 6107 constraint = self._parse_column_constraint() 6108 if not constraint: 6109 break 6110 constraints.append(constraint) 6111 6112 if not kind and not constraints: 6113 return this 6114 6115 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6116 6117 def _parse_auto_increment( 6118 self, 6119 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6120 start = None 6121 increment = None 6122 order = None 6123 6124 if self._match(TokenType.L_PAREN, advance=False): 6125 args = self._parse_wrapped_csv(self._parse_bitwise) 6126 start = seq_get(args, 0) 6127 increment = seq_get(args, 1) 6128 elif self._match_text_seq("START"): 6129 start = self._parse_bitwise() 6130 self._match_text_seq("INCREMENT") 6131 increment = self._parse_bitwise() 6132 if self._match_text_seq("ORDER"): 6133 order = True 6134 elif self._match_text_seq("NOORDER"): 6135 order = False 6136 6137 if start and increment: 6138 return exp.GeneratedAsIdentityColumnConstraint( 6139 start=start, increment=increment, this=False, order=order 6140 ) 6141 6142 return exp.AutoIncrementColumnConstraint() 6143 6144 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6145 if not self._match_text_seq("REFRESH"): 6146 self._retreat(self._index - 1) 6147 return None 6148 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6149 6150 def _parse_compress(self) -> exp.CompressColumnConstraint: 6151 if self._match(TokenType.L_PAREN, advance=False): 6152 return self.expression( 6153 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6154 ) 6155 6156 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6157 6158 def _parse_generated_as_identity( 6159 self, 6160 ) -> ( 6161 exp.GeneratedAsIdentityColumnConstraint 6162 | exp.ComputedColumnConstraint 6163 | exp.GeneratedAsRowColumnConstraint 6164 ): 6165 if self._match_text_seq("BY", "DEFAULT"): 6166 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6167 this = self.expression( 6168 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6169 ) 6170 else: 6171 self._match_text_seq("ALWAYS") 6172 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6173 6174 self._match(TokenType.ALIAS) 6175 6176 if self._match_text_seq("ROW"): 6177 start = self._match_text_seq("START") 6178 if not start: 6179 self._match(TokenType.END) 6180 hidden = self._match_text_seq("HIDDEN") 6181 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6182 6183 identity = self._match_text_seq("IDENTITY") 6184 6185 if self._match(TokenType.L_PAREN): 6186 if self._match(TokenType.START_WITH): 6187 this.set("start", self._parse_bitwise()) 6188 if self._match_text_seq("INCREMENT", "BY"): 6189 this.set("increment", self._parse_bitwise()) 6190 if self._match_text_seq("MINVALUE"): 6191 this.set("minvalue", self._parse_bitwise()) 6192 if self._match_text_seq("MAXVALUE"): 6193 this.set("maxvalue", self._parse_bitwise()) 6194 6195 if self._match_text_seq("CYCLE"): 6196 this.set("cycle", True) 6197 elif self._match_text_seq("NO", "CYCLE"): 6198 this.set("cycle", False) 6199 6200 if not identity: 6201 this.set("expression", self._parse_range()) 6202 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6203 args = self._parse_csv(self._parse_bitwise) 6204 this.set("start", seq_get(args, 0)) 6205 this.set("increment", seq_get(args, 1)) 6206 6207 self._match_r_paren() 6208 6209 return this 6210 6211 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6212 self._match_text_seq("LENGTH") 6213 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6214 6215 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6216 if self._match_text_seq("NULL"): 6217 return self.expression(exp.NotNullColumnConstraint) 6218 if self._match_text_seq("CASESPECIFIC"): 6219 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6220 if self._match_text_seq("FOR", "REPLICATION"): 6221 return self.expression(exp.NotForReplicationColumnConstraint) 6222 6223 # Unconsume the `NOT` token 6224 self._retreat(self._index - 1) 6225 return None 6226 6227 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6228 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6229 6230 procedure_option_follows = ( 6231 self._match(TokenType.WITH, advance=False) 6232 and self._next 6233 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6234 ) 6235 6236 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6237 return self.expression( 6238 exp.ColumnConstraint, 6239 this=this, 6240 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6241 ) 6242 6243 return this 6244 6245 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6246 if not self._match(TokenType.CONSTRAINT): 6247 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6248 6249 return self.expression( 6250 exp.Constraint, 6251 this=self._parse_id_var(), 6252 expressions=self._parse_unnamed_constraints(), 6253 ) 6254 6255 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6256 constraints = [] 6257 while True: 6258 constraint = self._parse_unnamed_constraint() or self._parse_function() 6259 if not constraint: 6260 break 6261 constraints.append(constraint) 6262 6263 return constraints 6264 6265 def _parse_unnamed_constraint( 6266 self, constraints: t.Optional[t.Collection[str]] = None 6267 ) -> t.Optional[exp.Expression]: 6268 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6269 constraints or self.CONSTRAINT_PARSERS 6270 ): 6271 return None 6272 6273 constraint = self._prev.text.upper() 6274 if constraint not in self.CONSTRAINT_PARSERS: 6275 self.raise_error(f"No parser found for schema constraint {constraint}.") 6276 6277 return self.CONSTRAINT_PARSERS[constraint](self) 6278 6279 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6280 return self._parse_id_var(any_token=False) 6281 6282 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6283 self._match_texts(("KEY", "INDEX")) 6284 return self.expression( 6285 exp.UniqueColumnConstraint, 6286 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6287 this=self._parse_schema(self._parse_unique_key()), 6288 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6289 on_conflict=self._parse_on_conflict(), 6290 options=self._parse_key_constraint_options(), 6291 ) 6292 6293 def _parse_key_constraint_options(self) -> t.List[str]: 6294 options = [] 6295 while True: 6296 if not self._curr: 6297 break 6298 6299 if self._match(TokenType.ON): 6300 action = None 6301 on = self._advance_any() and self._prev.text 6302 6303 if self._match_text_seq("NO", "ACTION"): 6304 action = "NO ACTION" 6305 elif self._match_text_seq("CASCADE"): 6306 action = "CASCADE" 6307 elif self._match_text_seq("RESTRICT"): 6308 action = "RESTRICT" 6309 elif self._match_pair(TokenType.SET, TokenType.NULL): 6310 action = "SET NULL" 6311 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6312 action = "SET DEFAULT" 6313 else: 6314 self.raise_error("Invalid key constraint") 6315 6316 options.append(f"ON {on} {action}") 6317 else: 6318 var = self._parse_var_from_options( 6319 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6320 ) 6321 if not var: 6322 break 6323 options.append(var.name) 6324 6325 return options 6326 6327 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6328 if match and not self._match(TokenType.REFERENCES): 6329 return None 6330 6331 expressions = None 6332 this = self._parse_table(schema=True) 6333 options = self._parse_key_constraint_options() 6334 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6335 6336 def _parse_foreign_key(self) -> exp.ForeignKey: 6337 expressions = ( 6338 self._parse_wrapped_id_vars() 6339 if not self._match(TokenType.REFERENCES, advance=False) 6340 else None 6341 ) 6342 reference = self._parse_references() 6343 on_options = {} 6344 6345 while self._match(TokenType.ON): 6346 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6347 self.raise_error("Expected DELETE or UPDATE") 6348 6349 kind = self._prev.text.lower() 6350 6351 if self._match_text_seq("NO", "ACTION"): 6352 action = "NO ACTION" 6353 elif self._match(TokenType.SET): 6354 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6355 action = "SET " + self._prev.text.upper() 6356 else: 6357 self._advance() 6358 action = self._prev.text.upper() 6359 6360 on_options[kind] = action 6361 6362 return self.expression( 6363 exp.ForeignKey, 6364 expressions=expressions, 6365 reference=reference, 6366 options=self._parse_key_constraint_options(), 6367 **on_options, # type: ignore 6368 ) 6369 6370 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6371 return self._parse_ordered() or self._parse_field() 6372 6373 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6374 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6375 self._retreat(self._index - 1) 6376 return None 6377 6378 id_vars = self._parse_wrapped_id_vars() 6379 return self.expression( 6380 exp.PeriodForSystemTimeConstraint, 6381 this=seq_get(id_vars, 0), 6382 expression=seq_get(id_vars, 1), 6383 ) 6384 6385 def _parse_primary_key( 6386 self, wrapped_optional: bool = False, in_props: bool = False 6387 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6388 desc = ( 6389 self._match_set((TokenType.ASC, TokenType.DESC)) 6390 and self._prev.token_type == TokenType.DESC 6391 ) 6392 6393 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6394 return self.expression( 6395 exp.PrimaryKeyColumnConstraint, 6396 desc=desc, 6397 options=self._parse_key_constraint_options(), 6398 ) 6399 6400 expressions = self._parse_wrapped_csv( 6401 self._parse_primary_key_part, optional=wrapped_optional 6402 ) 6403 6404 return self.expression( 6405 exp.PrimaryKey, 6406 expressions=expressions, 6407 include=self._parse_index_params(), 6408 options=self._parse_key_constraint_options(), 6409 ) 6410 6411 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6412 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6413 6414 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6415 """ 6416 Parses a datetime column in ODBC format. We parse the column into the corresponding 6417 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6418 same as we did for `DATE('yyyy-mm-dd')`. 6419 6420 Reference: 6421 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6422 """ 6423 self._match(TokenType.VAR) 6424 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6425 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6426 if not self._match(TokenType.R_BRACE): 6427 self.raise_error("Expected }") 6428 return expression 6429 6430 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6431 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6432 return this 6433 6434 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6435 map_token = seq_get(self._tokens, self._index - 2) 6436 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6437 else: 6438 parse_map = False 6439 6440 bracket_kind = self._prev.token_type 6441 if ( 6442 bracket_kind == TokenType.L_BRACE 6443 and self._curr 6444 and self._curr.token_type == TokenType.VAR 6445 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6446 ): 6447 return self._parse_odbc_datetime_literal() 6448 6449 expressions = self._parse_csv( 6450 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6451 ) 6452 6453 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6454 self.raise_error("Expected ]") 6455 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6456 self.raise_error("Expected }") 6457 6458 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6459 if bracket_kind == TokenType.L_BRACE: 6460 this = self.expression( 6461 exp.Struct, 6462 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6463 ) 6464 elif not this: 6465 this = build_array_constructor( 6466 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6467 ) 6468 else: 6469 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6470 if constructor_type: 6471 return build_array_constructor( 6472 constructor_type, 6473 args=expressions, 6474 bracket_kind=bracket_kind, 6475 dialect=self.dialect, 6476 ) 6477 6478 expressions = apply_index_offset( 6479 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6480 ) 6481 this = self.expression( 6482 exp.Bracket, 6483 this=this, 6484 expressions=expressions, 6485 comments=this.pop_comments(), 6486 ) 6487 6488 self._add_comments(this) 6489 return self._parse_bracket(this) 6490 6491 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6492 if self._match(TokenType.COLON): 6493 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6494 return this 6495 6496 def _parse_case(self) -> t.Optional[exp.Expression]: 6497 if self._match(TokenType.DOT, advance=False): 6498 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6499 self._retreat(self._index - 1) 6500 return None 6501 6502 ifs = [] 6503 default = None 6504 6505 comments = self._prev_comments 6506 expression = self._parse_assignment() 6507 6508 while self._match(TokenType.WHEN): 6509 this = self._parse_assignment() 6510 self._match(TokenType.THEN) 6511 then = self._parse_assignment() 6512 ifs.append(self.expression(exp.If, this=this, true=then)) 6513 6514 if self._match(TokenType.ELSE): 6515 default = self._parse_assignment() 6516 6517 if not self._match(TokenType.END): 6518 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6519 default = exp.column("interval") 6520 else: 6521 self.raise_error("Expected END after CASE", self._prev) 6522 6523 return self.expression( 6524 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6525 ) 6526 6527 def _parse_if(self) -> t.Optional[exp.Expression]: 6528 if self._match(TokenType.L_PAREN): 6529 args = self._parse_csv( 6530 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6531 ) 6532 this = self.validate_expression(exp.If.from_arg_list(args), args) 6533 self._match_r_paren() 6534 else: 6535 index = self._index - 1 6536 6537 if self.NO_PAREN_IF_COMMANDS and index == 0: 6538 return self._parse_as_command(self._prev) 6539 6540 condition = self._parse_assignment() 6541 6542 if not condition: 6543 self._retreat(index) 6544 return None 6545 6546 self._match(TokenType.THEN) 6547 true = self._parse_assignment() 6548 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6549 self._match(TokenType.END) 6550 this = self.expression(exp.If, this=condition, true=true, false=false) 6551 6552 return this 6553 6554 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6555 if not self._match_text_seq("VALUE", "FOR"): 6556 self._retreat(self._index - 1) 6557 return None 6558 6559 return self.expression( 6560 exp.NextValueFor, 6561 this=self._parse_column(), 6562 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6563 ) 6564 6565 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6566 this = self._parse_function() or self._parse_var_or_string(upper=True) 6567 6568 if self._match(TokenType.FROM): 6569 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6570 6571 if not self._match(TokenType.COMMA): 6572 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6573 6574 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6575 6576 def _parse_gap_fill(self) -> exp.GapFill: 6577 self._match(TokenType.TABLE) 6578 this = self._parse_table() 6579 6580 self._match(TokenType.COMMA) 6581 args = [this, *self._parse_csv(self._parse_lambda)] 6582 6583 gap_fill = exp.GapFill.from_arg_list(args) 6584 return self.validate_expression(gap_fill, args) 6585 6586 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6587 this = self._parse_assignment() 6588 6589 if not self._match(TokenType.ALIAS): 6590 if self._match(TokenType.COMMA): 6591 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6592 6593 self.raise_error("Expected AS after CAST") 6594 6595 fmt = None 6596 to = self._parse_types() 6597 6598 default = self._match(TokenType.DEFAULT) 6599 if default: 6600 default = self._parse_bitwise() 6601 self._match_text_seq("ON", "CONVERSION", "ERROR") 6602 6603 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6604 fmt_string = self._parse_string() 6605 fmt = self._parse_at_time_zone(fmt_string) 6606 6607 if not to: 6608 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6609 if to.this in exp.DataType.TEMPORAL_TYPES: 6610 this = self.expression( 6611 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6612 this=this, 6613 format=exp.Literal.string( 6614 format_time( 6615 fmt_string.this if fmt_string else "", 6616 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6617 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6618 ) 6619 ), 6620 safe=safe, 6621 ) 6622 6623 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6624 this.set("zone", fmt.args["zone"]) 6625 return this 6626 elif not to: 6627 self.raise_error("Expected TYPE after CAST") 6628 elif isinstance(to, exp.Identifier): 6629 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6630 elif to.this == exp.DataType.Type.CHAR: 6631 if self._match(TokenType.CHARACTER_SET): 6632 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6633 6634 return self.build_cast( 6635 strict=strict, 6636 this=this, 6637 to=to, 6638 format=fmt, 6639 safe=safe, 6640 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6641 default=default, 6642 ) 6643 6644 def _parse_string_agg(self) -> exp.GroupConcat: 6645 if self._match(TokenType.DISTINCT): 6646 args: t.List[t.Optional[exp.Expression]] = [ 6647 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6648 ] 6649 if self._match(TokenType.COMMA): 6650 args.extend(self._parse_csv(self._parse_assignment)) 6651 else: 6652 args = self._parse_csv(self._parse_assignment) # type: ignore 6653 6654 if self._match_text_seq("ON", "OVERFLOW"): 6655 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6656 if self._match_text_seq("ERROR"): 6657 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6658 else: 6659 self._match_text_seq("TRUNCATE") 6660 on_overflow = self.expression( 6661 exp.OverflowTruncateBehavior, 6662 this=self._parse_string(), 6663 with_count=( 6664 self._match_text_seq("WITH", "COUNT") 6665 or not self._match_text_seq("WITHOUT", "COUNT") 6666 ), 6667 ) 6668 else: 6669 on_overflow = None 6670 6671 index = self._index 6672 if not self._match(TokenType.R_PAREN) and args: 6673 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6674 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6675 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6676 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6677 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6678 6679 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6680 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6681 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6682 if not self._match_text_seq("WITHIN", "GROUP"): 6683 self._retreat(index) 6684 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6685 6686 # The corresponding match_r_paren will be called in parse_function (caller) 6687 self._match_l_paren() 6688 6689 return self.expression( 6690 exp.GroupConcat, 6691 this=self._parse_order(this=seq_get(args, 0)), 6692 separator=seq_get(args, 1), 6693 on_overflow=on_overflow, 6694 ) 6695 6696 def _parse_convert( 6697 self, strict: bool, safe: t.Optional[bool] = None 6698 ) -> t.Optional[exp.Expression]: 6699 this = self._parse_bitwise() 6700 6701 if self._match(TokenType.USING): 6702 to: t.Optional[exp.Expression] = self.expression( 6703 exp.CharacterSet, this=self._parse_var() 6704 ) 6705 elif self._match(TokenType.COMMA): 6706 to = self._parse_types() 6707 else: 6708 to = None 6709 6710 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6711 6712 def _parse_xml_table(self) -> exp.XMLTable: 6713 namespaces = None 6714 passing = None 6715 columns = None 6716 6717 if self._match_text_seq("XMLNAMESPACES", "("): 6718 namespaces = self._parse_xml_namespace() 6719 self._match_text_seq(")", ",") 6720 6721 this = self._parse_string() 6722 6723 if self._match_text_seq("PASSING"): 6724 # The BY VALUE keywords are optional and are provided for semantic clarity 6725 self._match_text_seq("BY", "VALUE") 6726 passing = self._parse_csv(self._parse_column) 6727 6728 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6729 6730 if self._match_text_seq("COLUMNS"): 6731 columns = self._parse_csv(self._parse_field_def) 6732 6733 return self.expression( 6734 exp.XMLTable, 6735 this=this, 6736 namespaces=namespaces, 6737 passing=passing, 6738 columns=columns, 6739 by_ref=by_ref, 6740 ) 6741 6742 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6743 namespaces = [] 6744 6745 while True: 6746 if self._match(TokenType.DEFAULT): 6747 uri = self._parse_string() 6748 else: 6749 uri = self._parse_alias(self._parse_string()) 6750 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6751 if not self._match(TokenType.COMMA): 6752 break 6753 6754 return namespaces 6755 6756 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6757 args = self._parse_csv(self._parse_assignment) 6758 6759 if len(args) < 3: 6760 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6761 6762 return self.expression(exp.DecodeCase, expressions=args) 6763 6764 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6765 self._match_text_seq("KEY") 6766 key = self._parse_column() 6767 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6768 self._match_text_seq("VALUE") 6769 value = self._parse_bitwise() 6770 6771 if not key and not value: 6772 return None 6773 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6774 6775 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6776 if not this or not self._match_text_seq("FORMAT", "JSON"): 6777 return this 6778 6779 return self.expression(exp.FormatJson, this=this) 6780 6781 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6782 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6783 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6784 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6785 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6786 else: 6787 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6788 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6789 6790 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6791 6792 if not empty and not error and not null: 6793 return None 6794 6795 return self.expression( 6796 exp.OnCondition, 6797 empty=empty, 6798 error=error, 6799 null=null, 6800 ) 6801 6802 def _parse_on_handling( 6803 self, on: str, *values: str 6804 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6805 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6806 for value in values: 6807 if self._match_text_seq(value, "ON", on): 6808 return f"{value} ON {on}" 6809 6810 index = self._index 6811 if self._match(TokenType.DEFAULT): 6812 default_value = self._parse_bitwise() 6813 if self._match_text_seq("ON", on): 6814 return default_value 6815 6816 self._retreat(index) 6817 6818 return None 6819 6820 @t.overload 6821 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6822 6823 @t.overload 6824 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6825 6826 def _parse_json_object(self, agg=False): 6827 star = self._parse_star() 6828 expressions = ( 6829 [star] 6830 if star 6831 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6832 ) 6833 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6834 6835 unique_keys = None 6836 if self._match_text_seq("WITH", "UNIQUE"): 6837 unique_keys = True 6838 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6839 unique_keys = False 6840 6841 self._match_text_seq("KEYS") 6842 6843 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6844 self._parse_type() 6845 ) 6846 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6847 6848 return self.expression( 6849 exp.JSONObjectAgg if agg else exp.JSONObject, 6850 expressions=expressions, 6851 null_handling=null_handling, 6852 unique_keys=unique_keys, 6853 return_type=return_type, 6854 encoding=encoding, 6855 ) 6856 6857 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6858 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6859 if not self._match_text_seq("NESTED"): 6860 this = self._parse_id_var() 6861 kind = self._parse_types(allow_identifiers=False) 6862 nested = None 6863 else: 6864 this = None 6865 kind = None 6866 nested = True 6867 6868 path = self._match_text_seq("PATH") and self._parse_string() 6869 nested_schema = nested and self._parse_json_schema() 6870 6871 return self.expression( 6872 exp.JSONColumnDef, 6873 this=this, 6874 kind=kind, 6875 path=path, 6876 nested_schema=nested_schema, 6877 ) 6878 6879 def _parse_json_schema(self) -> exp.JSONSchema: 6880 self._match_text_seq("COLUMNS") 6881 return self.expression( 6882 exp.JSONSchema, 6883 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6884 ) 6885 6886 def _parse_json_table(self) -> exp.JSONTable: 6887 this = self._parse_format_json(self._parse_bitwise()) 6888 path = self._match(TokenType.COMMA) and self._parse_string() 6889 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6890 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6891 schema = self._parse_json_schema() 6892 6893 return exp.JSONTable( 6894 this=this, 6895 schema=schema, 6896 path=path, 6897 error_handling=error_handling, 6898 empty_handling=empty_handling, 6899 ) 6900 6901 def _parse_match_against(self) -> exp.MatchAgainst: 6902 if self._match_text_seq("TABLE"): 6903 # parse SingleStore MATCH(TABLE ...) syntax 6904 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6905 expressions = [] 6906 table = self._parse_table() 6907 if table: 6908 expressions = [table] 6909 else: 6910 expressions = self._parse_csv(self._parse_column) 6911 6912 self._match_text_seq(")", "AGAINST", "(") 6913 6914 this = self._parse_string() 6915 6916 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6917 modifier = "IN NATURAL LANGUAGE MODE" 6918 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6919 modifier = f"{modifier} WITH QUERY EXPANSION" 6920 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6921 modifier = "IN BOOLEAN MODE" 6922 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6923 modifier = "WITH QUERY EXPANSION" 6924 else: 6925 modifier = None 6926 6927 return self.expression( 6928 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6929 ) 6930 6931 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6932 def _parse_open_json(self) -> exp.OpenJSON: 6933 this = self._parse_bitwise() 6934 path = self._match(TokenType.COMMA) and self._parse_string() 6935 6936 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6937 this = self._parse_field(any_token=True) 6938 kind = self._parse_types() 6939 path = self._parse_string() 6940 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6941 6942 return self.expression( 6943 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6944 ) 6945 6946 expressions = None 6947 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6948 self._match_l_paren() 6949 expressions = self._parse_csv(_parse_open_json_column_def) 6950 6951 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6952 6953 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6954 args = self._parse_csv(self._parse_bitwise) 6955 6956 if self._match(TokenType.IN): 6957 return self.expression( 6958 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6959 ) 6960 6961 if haystack_first: 6962 haystack = seq_get(args, 0) 6963 needle = seq_get(args, 1) 6964 else: 6965 haystack = seq_get(args, 1) 6966 needle = seq_get(args, 0) 6967 6968 return self.expression( 6969 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6970 ) 6971 6972 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6973 args = self._parse_csv(self._parse_table) 6974 return exp.JoinHint(this=func_name.upper(), expressions=args) 6975 6976 def _parse_substring(self) -> exp.Substring: 6977 # Postgres supports the form: substring(string [from int] [for int]) 6978 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6979 6980 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6981 6982 if self._match(TokenType.FROM): 6983 args.append(self._parse_bitwise()) 6984 if self._match(TokenType.FOR): 6985 if len(args) == 1: 6986 args.append(exp.Literal.number(1)) 6987 args.append(self._parse_bitwise()) 6988 6989 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6990 6991 def _parse_trim(self) -> exp.Trim: 6992 # https://www.w3resource.com/sql/character-functions/trim.php 6993 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6994 6995 position = None 6996 collation = None 6997 expression = None 6998 6999 if self._match_texts(self.TRIM_TYPES): 7000 position = self._prev.text.upper() 7001 7002 this = self._parse_bitwise() 7003 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7004 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7005 expression = self._parse_bitwise() 7006 7007 if invert_order: 7008 this, expression = expression, this 7009 7010 if self._match(TokenType.COLLATE): 7011 collation = self._parse_bitwise() 7012 7013 return self.expression( 7014 exp.Trim, this=this, position=position, expression=expression, collation=collation 7015 ) 7016 7017 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7018 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7019 7020 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7021 return self._parse_window(self._parse_id_var(), alias=True) 7022 7023 def _parse_respect_or_ignore_nulls( 7024 self, this: t.Optional[exp.Expression] 7025 ) -> t.Optional[exp.Expression]: 7026 if self._match_text_seq("IGNORE", "NULLS"): 7027 return self.expression(exp.IgnoreNulls, this=this) 7028 if self._match_text_seq("RESPECT", "NULLS"): 7029 return self.expression(exp.RespectNulls, this=this) 7030 return this 7031 7032 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7033 if self._match(TokenType.HAVING): 7034 self._match_texts(("MAX", "MIN")) 7035 max = self._prev.text.upper() != "MIN" 7036 return self.expression( 7037 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7038 ) 7039 7040 return this 7041 7042 def _parse_window( 7043 self, this: t.Optional[exp.Expression], alias: bool = False 7044 ) -> t.Optional[exp.Expression]: 7045 func = this 7046 comments = func.comments if isinstance(func, exp.Expression) else None 7047 7048 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7049 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7050 if self._match_text_seq("WITHIN", "GROUP"): 7051 order = self._parse_wrapped(self._parse_order) 7052 this = self.expression(exp.WithinGroup, this=this, expression=order) 7053 7054 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7055 self._match(TokenType.WHERE) 7056 this = self.expression( 7057 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7058 ) 7059 self._match_r_paren() 7060 7061 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7062 # Some dialects choose to implement and some do not. 7063 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7064 7065 # There is some code above in _parse_lambda that handles 7066 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7067 7068 # The below changes handle 7069 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7070 7071 # Oracle allows both formats 7072 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7073 # and Snowflake chose to do the same for familiarity 7074 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7075 if isinstance(this, exp.AggFunc): 7076 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7077 7078 if ignore_respect and ignore_respect is not this: 7079 ignore_respect.replace(ignore_respect.this) 7080 this = self.expression(ignore_respect.__class__, this=this) 7081 7082 this = self._parse_respect_or_ignore_nulls(this) 7083 7084 # bigquery select from window x AS (partition by ...) 7085 if alias: 7086 over = None 7087 self._match(TokenType.ALIAS) 7088 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7089 return this 7090 else: 7091 over = self._prev.text.upper() 7092 7093 if comments and isinstance(func, exp.Expression): 7094 func.pop_comments() 7095 7096 if not self._match(TokenType.L_PAREN): 7097 return self.expression( 7098 exp.Window, 7099 comments=comments, 7100 this=this, 7101 alias=self._parse_id_var(False), 7102 over=over, 7103 ) 7104 7105 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7106 7107 first = self._match(TokenType.FIRST) 7108 if self._match_text_seq("LAST"): 7109 first = False 7110 7111 partition, order = self._parse_partition_and_order() 7112 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7113 7114 if kind: 7115 self._match(TokenType.BETWEEN) 7116 start = self._parse_window_spec() 7117 7118 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7119 exclude = ( 7120 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7121 if self._match_text_seq("EXCLUDE") 7122 else None 7123 ) 7124 7125 spec = self.expression( 7126 exp.WindowSpec, 7127 kind=kind, 7128 start=start["value"], 7129 start_side=start["side"], 7130 end=end.get("value"), 7131 end_side=end.get("side"), 7132 exclude=exclude, 7133 ) 7134 else: 7135 spec = None 7136 7137 self._match_r_paren() 7138 7139 window = self.expression( 7140 exp.Window, 7141 comments=comments, 7142 this=this, 7143 partition_by=partition, 7144 order=order, 7145 spec=spec, 7146 alias=window_alias, 7147 over=over, 7148 first=first, 7149 ) 7150 7151 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7152 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7153 return self._parse_window(window, alias=alias) 7154 7155 return window 7156 7157 def _parse_partition_and_order( 7158 self, 7159 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7160 return self._parse_partition_by(), self._parse_order() 7161 7162 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7163 self._match(TokenType.BETWEEN) 7164 7165 return { 7166 "value": ( 7167 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7168 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7169 or self._parse_type() 7170 ), 7171 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7172 } 7173 7174 def _parse_alias( 7175 self, this: t.Optional[exp.Expression], explicit: bool = False 7176 ) -> t.Optional[exp.Expression]: 7177 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7178 # so this section tries to parse the clause version and if it fails, it treats the token 7179 # as an identifier (alias) 7180 if self._can_parse_limit_or_offset(): 7181 return this 7182 7183 any_token = self._match(TokenType.ALIAS) 7184 comments = self._prev_comments or [] 7185 7186 if explicit and not any_token: 7187 return this 7188 7189 if self._match(TokenType.L_PAREN): 7190 aliases = self.expression( 7191 exp.Aliases, 7192 comments=comments, 7193 this=this, 7194 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7195 ) 7196 self._match_r_paren(aliases) 7197 return aliases 7198 7199 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7200 self.STRING_ALIASES and self._parse_string_as_identifier() 7201 ) 7202 7203 if alias: 7204 comments.extend(alias.pop_comments()) 7205 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7206 column = this.this 7207 7208 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7209 if not this.comments and column and column.comments: 7210 this.comments = column.pop_comments() 7211 7212 return this 7213 7214 def _parse_id_var( 7215 self, 7216 any_token: bool = True, 7217 tokens: t.Optional[t.Collection[TokenType]] = None, 7218 ) -> t.Optional[exp.Expression]: 7219 expression = self._parse_identifier() 7220 if not expression and ( 7221 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7222 ): 7223 quoted = self._prev.token_type == TokenType.STRING 7224 expression = self._identifier_expression(quoted=quoted) 7225 7226 return expression 7227 7228 def _parse_string(self) -> t.Optional[exp.Expression]: 7229 if self._match_set(self.STRING_PARSERS): 7230 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7231 return self._parse_placeholder() 7232 7233 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7234 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7235 if output: 7236 output.update_positions(self._prev) 7237 return output 7238 7239 def _parse_number(self) -> t.Optional[exp.Expression]: 7240 if self._match_set(self.NUMERIC_PARSERS): 7241 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7242 return self._parse_placeholder() 7243 7244 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7245 if self._match(TokenType.IDENTIFIER): 7246 return self._identifier_expression(quoted=True) 7247 return self._parse_placeholder() 7248 7249 def _parse_var( 7250 self, 7251 any_token: bool = False, 7252 tokens: t.Optional[t.Collection[TokenType]] = None, 7253 upper: bool = False, 7254 ) -> t.Optional[exp.Expression]: 7255 if ( 7256 (any_token and self._advance_any()) 7257 or self._match(TokenType.VAR) 7258 or (self._match_set(tokens) if tokens else False) 7259 ): 7260 return self.expression( 7261 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7262 ) 7263 return self._parse_placeholder() 7264 7265 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7266 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7267 self._advance() 7268 return self._prev 7269 return None 7270 7271 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7272 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7273 7274 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7275 return self._parse_primary() or self._parse_var(any_token=True) 7276 7277 def _parse_null(self) -> t.Optional[exp.Expression]: 7278 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7279 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7280 return self._parse_placeholder() 7281 7282 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7283 if self._match(TokenType.TRUE): 7284 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7285 if self._match(TokenType.FALSE): 7286 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7287 return self._parse_placeholder() 7288 7289 def _parse_star(self) -> t.Optional[exp.Expression]: 7290 if self._match(TokenType.STAR): 7291 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7292 return self._parse_placeholder() 7293 7294 def _parse_parameter(self) -> exp.Parameter: 7295 this = self._parse_identifier() or self._parse_primary_or_var() 7296 return self.expression(exp.Parameter, this=this) 7297 7298 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7299 if self._match_set(self.PLACEHOLDER_PARSERS): 7300 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7301 if placeholder: 7302 return placeholder 7303 self._advance(-1) 7304 return None 7305 7306 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7307 if not self._match_texts(keywords): 7308 return None 7309 if self._match(TokenType.L_PAREN, advance=False): 7310 return self._parse_wrapped_csv(self._parse_expression) 7311 7312 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7313 return [expression] if expression else None 7314 7315 def _parse_csv( 7316 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7317 ) -> t.List[exp.Expression]: 7318 parse_result = parse_method() 7319 items = [parse_result] if parse_result is not None else [] 7320 7321 while self._match(sep): 7322 self._add_comments(parse_result) 7323 parse_result = parse_method() 7324 if parse_result is not None: 7325 items.append(parse_result) 7326 7327 return items 7328 7329 def _parse_tokens( 7330 self, parse_method: t.Callable, expressions: t.Dict 7331 ) -> t.Optional[exp.Expression]: 7332 this = parse_method() 7333 7334 while self._match_set(expressions): 7335 this = self.expression( 7336 expressions[self._prev.token_type], 7337 this=this, 7338 comments=self._prev_comments, 7339 expression=parse_method(), 7340 ) 7341 7342 return this 7343 7344 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7345 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7346 7347 def _parse_wrapped_csv( 7348 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7349 ) -> t.List[exp.Expression]: 7350 return self._parse_wrapped( 7351 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7352 ) 7353 7354 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7355 wrapped = self._match(TokenType.L_PAREN) 7356 if not wrapped and not optional: 7357 self.raise_error("Expecting (") 7358 parse_result = parse_method() 7359 if wrapped: 7360 self._match_r_paren() 7361 return parse_result 7362 7363 def _parse_expressions(self) -> t.List[exp.Expression]: 7364 return self._parse_csv(self._parse_expression) 7365 7366 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7367 return ( 7368 self._parse_set_operations( 7369 self._parse_alias(self._parse_assignment(), explicit=True) 7370 if alias 7371 else self._parse_assignment() 7372 ) 7373 or self._parse_select() 7374 ) 7375 7376 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7377 return self._parse_query_modifiers( 7378 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7379 ) 7380 7381 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7382 this = None 7383 if self._match_texts(self.TRANSACTION_KIND): 7384 this = self._prev.text 7385 7386 self._match_texts(("TRANSACTION", "WORK")) 7387 7388 modes = [] 7389 while True: 7390 mode = [] 7391 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7392 mode.append(self._prev.text) 7393 7394 if mode: 7395 modes.append(" ".join(mode)) 7396 if not self._match(TokenType.COMMA): 7397 break 7398 7399 return self.expression(exp.Transaction, this=this, modes=modes) 7400 7401 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7402 chain = None 7403 savepoint = None 7404 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7405 7406 self._match_texts(("TRANSACTION", "WORK")) 7407 7408 if self._match_text_seq("TO"): 7409 self._match_text_seq("SAVEPOINT") 7410 savepoint = self._parse_id_var() 7411 7412 if self._match(TokenType.AND): 7413 chain = not self._match_text_seq("NO") 7414 self._match_text_seq("CHAIN") 7415 7416 if is_rollback: 7417 return self.expression(exp.Rollback, savepoint=savepoint) 7418 7419 return self.expression(exp.Commit, chain=chain) 7420 7421 def _parse_refresh(self) -> exp.Refresh: 7422 self._match(TokenType.TABLE) 7423 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7424 7425 def _parse_column_def_with_exists(self): 7426 start = self._index 7427 self._match(TokenType.COLUMN) 7428 7429 exists_column = self._parse_exists(not_=True) 7430 expression = self._parse_field_def() 7431 7432 if not isinstance(expression, exp.ColumnDef): 7433 self._retreat(start) 7434 return None 7435 7436 expression.set("exists", exists_column) 7437 7438 return expression 7439 7440 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7441 if not self._prev.text.upper() == "ADD": 7442 return None 7443 7444 expression = self._parse_column_def_with_exists() 7445 if not expression: 7446 return None 7447 7448 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7449 if self._match_texts(("FIRST", "AFTER")): 7450 position = self._prev.text 7451 column_position = self.expression( 7452 exp.ColumnPosition, this=self._parse_column(), position=position 7453 ) 7454 expression.set("position", column_position) 7455 7456 return expression 7457 7458 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7459 drop = self._match(TokenType.DROP) and self._parse_drop() 7460 if drop and not isinstance(drop, exp.Command): 7461 drop.set("kind", drop.args.get("kind", "COLUMN")) 7462 return drop 7463 7464 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7465 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7466 return self.expression( 7467 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7468 ) 7469 7470 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7471 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7472 self._match_text_seq("ADD") 7473 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7474 return self.expression( 7475 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7476 ) 7477 7478 column_def = self._parse_add_column() 7479 if isinstance(column_def, exp.ColumnDef): 7480 return column_def 7481 7482 exists = self._parse_exists(not_=True) 7483 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7484 return self.expression( 7485 exp.AddPartition, 7486 exists=exists, 7487 this=self._parse_field(any_token=True), 7488 location=self._match_text_seq("LOCATION", advance=False) 7489 and self._parse_property(), 7490 ) 7491 7492 return None 7493 7494 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7495 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7496 or self._match_text_seq("COLUMNS") 7497 ): 7498 schema = self._parse_schema() 7499 7500 return ( 7501 ensure_list(schema) 7502 if schema 7503 else self._parse_csv(self._parse_column_def_with_exists) 7504 ) 7505 7506 return self._parse_csv(_parse_add_alteration) 7507 7508 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7509 if self._match_texts(self.ALTER_ALTER_PARSERS): 7510 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7511 7512 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7513 # keyword after ALTER we default to parsing this statement 7514 self._match(TokenType.COLUMN) 7515 column = self._parse_field(any_token=True) 7516 7517 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7518 return self.expression(exp.AlterColumn, this=column, drop=True) 7519 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7520 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7521 if self._match(TokenType.COMMENT): 7522 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7523 if self._match_text_seq("DROP", "NOT", "NULL"): 7524 return self.expression( 7525 exp.AlterColumn, 7526 this=column, 7527 drop=True, 7528 allow_null=True, 7529 ) 7530 if self._match_text_seq("SET", "NOT", "NULL"): 7531 return self.expression( 7532 exp.AlterColumn, 7533 this=column, 7534 allow_null=False, 7535 ) 7536 7537 if self._match_text_seq("SET", "VISIBLE"): 7538 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7539 if self._match_text_seq("SET", "INVISIBLE"): 7540 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7541 7542 self._match_text_seq("SET", "DATA") 7543 self._match_text_seq("TYPE") 7544 return self.expression( 7545 exp.AlterColumn, 7546 this=column, 7547 dtype=self._parse_types(), 7548 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7549 using=self._match(TokenType.USING) and self._parse_assignment(), 7550 ) 7551 7552 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7553 if self._match_texts(("ALL", "EVEN", "AUTO")): 7554 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7555 7556 self._match_text_seq("KEY", "DISTKEY") 7557 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7558 7559 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7560 if compound: 7561 self._match_text_seq("SORTKEY") 7562 7563 if self._match(TokenType.L_PAREN, advance=False): 7564 return self.expression( 7565 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7566 ) 7567 7568 self._match_texts(("AUTO", "NONE")) 7569 return self.expression( 7570 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7571 ) 7572 7573 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7574 index = self._index - 1 7575 7576 partition_exists = self._parse_exists() 7577 if self._match(TokenType.PARTITION, advance=False): 7578 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7579 7580 self._retreat(index) 7581 return self._parse_csv(self._parse_drop_column) 7582 7583 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7584 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7585 exists = self._parse_exists() 7586 old_column = self._parse_column() 7587 to = self._match_text_seq("TO") 7588 new_column = self._parse_column() 7589 7590 if old_column is None or to is None or new_column is None: 7591 return None 7592 7593 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7594 7595 self._match_text_seq("TO") 7596 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7597 7598 def _parse_alter_table_set(self) -> exp.AlterSet: 7599 alter_set = self.expression(exp.AlterSet) 7600 7601 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7602 "TABLE", "PROPERTIES" 7603 ): 7604 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7605 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7606 alter_set.set("expressions", [self._parse_assignment()]) 7607 elif self._match_texts(("LOGGED", "UNLOGGED")): 7608 alter_set.set("option", exp.var(self._prev.text.upper())) 7609 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7610 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7611 elif self._match_text_seq("LOCATION"): 7612 alter_set.set("location", self._parse_field()) 7613 elif self._match_text_seq("ACCESS", "METHOD"): 7614 alter_set.set("access_method", self._parse_field()) 7615 elif self._match_text_seq("TABLESPACE"): 7616 alter_set.set("tablespace", self._parse_field()) 7617 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7618 alter_set.set("file_format", [self._parse_field()]) 7619 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7620 alter_set.set("file_format", self._parse_wrapped_options()) 7621 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7622 alter_set.set("copy_options", self._parse_wrapped_options()) 7623 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7624 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7625 else: 7626 if self._match_text_seq("SERDE"): 7627 alter_set.set("serde", self._parse_field()) 7628 7629 properties = self._parse_wrapped(self._parse_properties, optional=True) 7630 alter_set.set("expressions", [properties]) 7631 7632 return alter_set 7633 7634 def _parse_alter_session(self) -> exp.AlterSession: 7635 """Parse ALTER SESSION SET/UNSET statements.""" 7636 if self._match(TokenType.SET): 7637 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7638 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7639 7640 self._match_text_seq("UNSET") 7641 expressions = self._parse_csv( 7642 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7643 ) 7644 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7645 7646 def _parse_alter(self) -> exp.Alter | exp.Command: 7647 start = self._prev 7648 7649 alter_token = self._match_set(self.ALTERABLES) and self._prev 7650 if not alter_token: 7651 return self._parse_as_command(start) 7652 7653 exists = self._parse_exists() 7654 only = self._match_text_seq("ONLY") 7655 7656 if alter_token.token_type == TokenType.SESSION: 7657 this = None 7658 check = None 7659 cluster = None 7660 else: 7661 this = self._parse_table(schema=True) 7662 check = self._match_text_seq("WITH", "CHECK") 7663 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7664 7665 if self._next: 7666 self._advance() 7667 7668 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7669 if parser: 7670 actions = ensure_list(parser(self)) 7671 not_valid = self._match_text_seq("NOT", "VALID") 7672 options = self._parse_csv(self._parse_property) 7673 7674 if not self._curr and actions: 7675 return self.expression( 7676 exp.Alter, 7677 this=this, 7678 kind=alter_token.text.upper(), 7679 exists=exists, 7680 actions=actions, 7681 only=only, 7682 options=options, 7683 cluster=cluster, 7684 not_valid=not_valid, 7685 check=check, 7686 ) 7687 7688 return self._parse_as_command(start) 7689 7690 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7691 start = self._prev 7692 # https://duckdb.org/docs/sql/statements/analyze 7693 if not self._curr: 7694 return self.expression(exp.Analyze) 7695 7696 options = [] 7697 while self._match_texts(self.ANALYZE_STYLES): 7698 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7699 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7700 else: 7701 options.append(self._prev.text.upper()) 7702 7703 this: t.Optional[exp.Expression] = None 7704 inner_expression: t.Optional[exp.Expression] = None 7705 7706 kind = self._curr and self._curr.text.upper() 7707 7708 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7709 this = self._parse_table_parts() 7710 elif self._match_text_seq("TABLES"): 7711 if self._match_set((TokenType.FROM, TokenType.IN)): 7712 kind = f"{kind} {self._prev.text.upper()}" 7713 this = self._parse_table(schema=True, is_db_reference=True) 7714 elif self._match_text_seq("DATABASE"): 7715 this = self._parse_table(schema=True, is_db_reference=True) 7716 elif self._match_text_seq("CLUSTER"): 7717 this = self._parse_table() 7718 # Try matching inner expr keywords before fallback to parse table. 7719 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7720 kind = None 7721 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7722 else: 7723 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7724 kind = None 7725 this = self._parse_table_parts() 7726 7727 partition = self._try_parse(self._parse_partition) 7728 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7729 return self._parse_as_command(start) 7730 7731 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7732 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7733 "WITH", "ASYNC", "MODE" 7734 ): 7735 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7736 else: 7737 mode = None 7738 7739 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7740 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7741 7742 properties = self._parse_properties() 7743 return self.expression( 7744 exp.Analyze, 7745 kind=kind, 7746 this=this, 7747 mode=mode, 7748 partition=partition, 7749 properties=properties, 7750 expression=inner_expression, 7751 options=options, 7752 ) 7753 7754 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7755 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7756 this = None 7757 kind = self._prev.text.upper() 7758 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7759 expressions = [] 7760 7761 if not self._match_text_seq("STATISTICS"): 7762 self.raise_error("Expecting token STATISTICS") 7763 7764 if self._match_text_seq("NOSCAN"): 7765 this = "NOSCAN" 7766 elif self._match(TokenType.FOR): 7767 if self._match_text_seq("ALL", "COLUMNS"): 7768 this = "FOR ALL COLUMNS" 7769 if self._match_texts("COLUMNS"): 7770 this = "FOR COLUMNS" 7771 expressions = self._parse_csv(self._parse_column_reference) 7772 elif self._match_text_seq("SAMPLE"): 7773 sample = self._parse_number() 7774 expressions = [ 7775 self.expression( 7776 exp.AnalyzeSample, 7777 sample=sample, 7778 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7779 ) 7780 ] 7781 7782 return self.expression( 7783 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7784 ) 7785 7786 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7787 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7788 kind = None 7789 this = None 7790 expression: t.Optional[exp.Expression] = None 7791 if self._match_text_seq("REF", "UPDATE"): 7792 kind = "REF" 7793 this = "UPDATE" 7794 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7795 this = "UPDATE SET DANGLING TO NULL" 7796 elif self._match_text_seq("STRUCTURE"): 7797 kind = "STRUCTURE" 7798 if self._match_text_seq("CASCADE", "FAST"): 7799 this = "CASCADE FAST" 7800 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7801 ("ONLINE", "OFFLINE") 7802 ): 7803 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7804 expression = self._parse_into() 7805 7806 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7807 7808 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7809 this = self._prev.text.upper() 7810 if self._match_text_seq("COLUMNS"): 7811 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7812 return None 7813 7814 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7815 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7816 if self._match_text_seq("STATISTICS"): 7817 return self.expression(exp.AnalyzeDelete, kind=kind) 7818 return None 7819 7820 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7821 if self._match_text_seq("CHAINED", "ROWS"): 7822 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7823 return None 7824 7825 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7826 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7827 this = self._prev.text.upper() 7828 expression: t.Optional[exp.Expression] = None 7829 expressions = [] 7830 update_options = None 7831 7832 if self._match_text_seq("HISTOGRAM", "ON"): 7833 expressions = self._parse_csv(self._parse_column_reference) 7834 with_expressions = [] 7835 while self._match(TokenType.WITH): 7836 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7837 if self._match_texts(("SYNC", "ASYNC")): 7838 if self._match_text_seq("MODE", advance=False): 7839 with_expressions.append(f"{self._prev.text.upper()} MODE") 7840 self._advance() 7841 else: 7842 buckets = self._parse_number() 7843 if self._match_text_seq("BUCKETS"): 7844 with_expressions.append(f"{buckets} BUCKETS") 7845 if with_expressions: 7846 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7847 7848 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7849 TokenType.UPDATE, advance=False 7850 ): 7851 update_options = self._prev.text.upper() 7852 self._advance() 7853 elif self._match_text_seq("USING", "DATA"): 7854 expression = self.expression(exp.UsingData, this=self._parse_string()) 7855 7856 return self.expression( 7857 exp.AnalyzeHistogram, 7858 this=this, 7859 expressions=expressions, 7860 expression=expression, 7861 update_options=update_options, 7862 ) 7863 7864 def _parse_merge(self) -> exp.Merge: 7865 self._match(TokenType.INTO) 7866 target = self._parse_table() 7867 7868 if target and self._match(TokenType.ALIAS, advance=False): 7869 target.set("alias", self._parse_table_alias()) 7870 7871 self._match(TokenType.USING) 7872 using = self._parse_table() 7873 7874 self._match(TokenType.ON) 7875 on = self._parse_assignment() 7876 7877 return self.expression( 7878 exp.Merge, 7879 this=target, 7880 using=using, 7881 on=on, 7882 whens=self._parse_when_matched(), 7883 returning=self._parse_returning(), 7884 ) 7885 7886 def _parse_when_matched(self) -> exp.Whens: 7887 whens = [] 7888 7889 while self._match(TokenType.WHEN): 7890 matched = not self._match(TokenType.NOT) 7891 self._match_text_seq("MATCHED") 7892 source = ( 7893 False 7894 if self._match_text_seq("BY", "TARGET") 7895 else self._match_text_seq("BY", "SOURCE") 7896 ) 7897 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7898 7899 self._match(TokenType.THEN) 7900 7901 if self._match(TokenType.INSERT): 7902 this = self._parse_star() 7903 if this: 7904 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7905 else: 7906 then = self.expression( 7907 exp.Insert, 7908 this=exp.var("ROW") 7909 if self._match_text_seq("ROW") 7910 else self._parse_value(values=False), 7911 expression=self._match_text_seq("VALUES") and self._parse_value(), 7912 ) 7913 elif self._match(TokenType.UPDATE): 7914 expressions = self._parse_star() 7915 if expressions: 7916 then = self.expression(exp.Update, expressions=expressions) 7917 else: 7918 then = self.expression( 7919 exp.Update, 7920 expressions=self._match(TokenType.SET) 7921 and self._parse_csv(self._parse_equality), 7922 ) 7923 elif self._match(TokenType.DELETE): 7924 then = self.expression(exp.Var, this=self._prev.text) 7925 else: 7926 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7927 7928 whens.append( 7929 self.expression( 7930 exp.When, 7931 matched=matched, 7932 source=source, 7933 condition=condition, 7934 then=then, 7935 ) 7936 ) 7937 return self.expression(exp.Whens, expressions=whens) 7938 7939 def _parse_show(self) -> t.Optional[exp.Expression]: 7940 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7941 if parser: 7942 return parser(self) 7943 return self._parse_as_command(self._prev) 7944 7945 def _parse_set_item_assignment( 7946 self, kind: t.Optional[str] = None 7947 ) -> t.Optional[exp.Expression]: 7948 index = self._index 7949 7950 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7951 return self._parse_set_transaction(global_=kind == "GLOBAL") 7952 7953 left = self._parse_primary() or self._parse_column() 7954 assignment_delimiter = self._match_texts(("=", "TO")) 7955 7956 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7957 self._retreat(index) 7958 return None 7959 7960 right = self._parse_statement() or self._parse_id_var() 7961 if isinstance(right, (exp.Column, exp.Identifier)): 7962 right = exp.var(right.name) 7963 7964 this = self.expression(exp.EQ, this=left, expression=right) 7965 return self.expression(exp.SetItem, this=this, kind=kind) 7966 7967 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7968 self._match_text_seq("TRANSACTION") 7969 characteristics = self._parse_csv( 7970 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7971 ) 7972 return self.expression( 7973 exp.SetItem, 7974 expressions=characteristics, 7975 kind="TRANSACTION", 7976 **{"global": global_}, # type: ignore 7977 ) 7978 7979 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7980 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7981 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7982 7983 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7984 index = self._index 7985 set_ = self.expression( 7986 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7987 ) 7988 7989 if self._curr: 7990 self._retreat(index) 7991 return self._parse_as_command(self._prev) 7992 7993 return set_ 7994 7995 def _parse_var_from_options( 7996 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7997 ) -> t.Optional[exp.Var]: 7998 start = self._curr 7999 if not start: 8000 return None 8001 8002 option = start.text.upper() 8003 continuations = options.get(option) 8004 8005 index = self._index 8006 self._advance() 8007 for keywords in continuations or []: 8008 if isinstance(keywords, str): 8009 keywords = (keywords,) 8010 8011 if self._match_text_seq(*keywords): 8012 option = f"{option} {' '.join(keywords)}" 8013 break 8014 else: 8015 if continuations or continuations is None: 8016 if raise_unmatched: 8017 self.raise_error(f"Unknown option {option}") 8018 8019 self._retreat(index) 8020 return None 8021 8022 return exp.var(option) 8023 8024 def _parse_as_command(self, start: Token) -> exp.Command: 8025 while self._curr: 8026 self._advance() 8027 text = self._find_sql(start, self._prev) 8028 size = len(start.text) 8029 self._warn_unsupported() 8030 return exp.Command(this=text[:size], expression=text[size:]) 8031 8032 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8033 settings = [] 8034 8035 self._match_l_paren() 8036 kind = self._parse_id_var() 8037 8038 if self._match(TokenType.L_PAREN): 8039 while True: 8040 key = self._parse_id_var() 8041 value = self._parse_primary() 8042 if not key and value is None: 8043 break 8044 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8045 self._match(TokenType.R_PAREN) 8046 8047 self._match_r_paren() 8048 8049 return self.expression( 8050 exp.DictProperty, 8051 this=this, 8052 kind=kind.this if kind else None, 8053 settings=settings, 8054 ) 8055 8056 def _parse_dict_range(self, this: str) -> exp.DictRange: 8057 self._match_l_paren() 8058 has_min = self._match_text_seq("MIN") 8059 if has_min: 8060 min = self._parse_var() or self._parse_primary() 8061 self._match_text_seq("MAX") 8062 max = self._parse_var() or self._parse_primary() 8063 else: 8064 max = self._parse_var() or self._parse_primary() 8065 min = exp.Literal.number(0) 8066 self._match_r_paren() 8067 return self.expression(exp.DictRange, this=this, min=min, max=max) 8068 8069 def _parse_comprehension( 8070 self, this: t.Optional[exp.Expression] 8071 ) -> t.Optional[exp.Comprehension]: 8072 index = self._index 8073 expression = self._parse_column() 8074 if not self._match(TokenType.IN): 8075 self._retreat(index - 1) 8076 return None 8077 iterator = self._parse_column() 8078 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8079 return self.expression( 8080 exp.Comprehension, 8081 this=this, 8082 expression=expression, 8083 iterator=iterator, 8084 condition=condition, 8085 ) 8086 8087 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8088 if self._match(TokenType.HEREDOC_STRING): 8089 return self.expression(exp.Heredoc, this=self._prev.text) 8090 8091 if not self._match_text_seq("$"): 8092 return None 8093 8094 tags = ["$"] 8095 tag_text = None 8096 8097 if self._is_connected(): 8098 self._advance() 8099 tags.append(self._prev.text.upper()) 8100 else: 8101 self.raise_error("No closing $ found") 8102 8103 if tags[-1] != "$": 8104 if self._is_connected() and self._match_text_seq("$"): 8105 tag_text = tags[-1] 8106 tags.append("$") 8107 else: 8108 self.raise_error("No closing $ found") 8109 8110 heredoc_start = self._curr 8111 8112 while self._curr: 8113 if self._match_text_seq(*tags, advance=False): 8114 this = self._find_sql(heredoc_start, self._prev) 8115 self._advance(len(tags)) 8116 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8117 8118 self._advance() 8119 8120 self.raise_error(f"No closing {''.join(tags)} found") 8121 return None 8122 8123 def _find_parser( 8124 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8125 ) -> t.Optional[t.Callable]: 8126 if not self._curr: 8127 return None 8128 8129 index = self._index 8130 this = [] 8131 while True: 8132 # The current token might be multiple words 8133 curr = self._curr.text.upper() 8134 key = curr.split(" ") 8135 this.append(curr) 8136 8137 self._advance() 8138 result, trie = in_trie(trie, key) 8139 if result == TrieResult.FAILED: 8140 break 8141 8142 if result == TrieResult.EXISTS: 8143 subparser = parsers[" ".join(this)] 8144 return subparser 8145 8146 self._retreat(index) 8147 return None 8148 8149 def _match(self, token_type, advance=True, expression=None): 8150 if not self._curr: 8151 return None 8152 8153 if self._curr.token_type == token_type: 8154 if advance: 8155 self._advance() 8156 self._add_comments(expression) 8157 return True 8158 8159 return None 8160 8161 def _match_set(self, types, advance=True): 8162 if not self._curr: 8163 return None 8164 8165 if self._curr.token_type in types: 8166 if advance: 8167 self._advance() 8168 return True 8169 8170 return None 8171 8172 def _match_pair(self, token_type_a, token_type_b, advance=True): 8173 if not self._curr or not self._next: 8174 return None 8175 8176 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8177 if advance: 8178 self._advance(2) 8179 return True 8180 8181 return None 8182 8183 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8184 if not self._match(TokenType.L_PAREN, expression=expression): 8185 self.raise_error("Expecting (") 8186 8187 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8188 if not self._match(TokenType.R_PAREN, expression=expression): 8189 self.raise_error("Expecting )") 8190 8191 def _match_texts(self, texts, advance=True): 8192 if ( 8193 self._curr 8194 and self._curr.token_type != TokenType.STRING 8195 and self._curr.text.upper() in texts 8196 ): 8197 if advance: 8198 self._advance() 8199 return True 8200 return None 8201 8202 def _match_text_seq(self, *texts, advance=True): 8203 index = self._index 8204 for text in texts: 8205 if ( 8206 self._curr 8207 and self._curr.token_type != TokenType.STRING 8208 and self._curr.text.upper() == text 8209 ): 8210 self._advance() 8211 else: 8212 self._retreat(index) 8213 return None 8214 8215 if not advance: 8216 self._retreat(index) 8217 8218 return True 8219 8220 def _replace_lambda( 8221 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8222 ) -> t.Optional[exp.Expression]: 8223 if not node: 8224 return node 8225 8226 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8227 8228 for column in node.find_all(exp.Column): 8229 typ = lambda_types.get(column.parts[0].name) 8230 if typ is not None: 8231 dot_or_id = column.to_dot() if column.table else column.this 8232 8233 if typ: 8234 dot_or_id = self.expression( 8235 exp.Cast, 8236 this=dot_or_id, 8237 to=typ, 8238 ) 8239 8240 parent = column.parent 8241 8242 while isinstance(parent, exp.Dot): 8243 if not isinstance(parent.parent, exp.Dot): 8244 parent.replace(dot_or_id) 8245 break 8246 parent = parent.parent 8247 else: 8248 if column is node: 8249 node = dot_or_id 8250 else: 8251 column.replace(dot_or_id) 8252 return node 8253 8254 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8255 start = self._prev 8256 8257 # Not to be confused with TRUNCATE(number, decimals) function call 8258 if self._match(TokenType.L_PAREN): 8259 self._retreat(self._index - 2) 8260 return self._parse_function() 8261 8262 # Clickhouse supports TRUNCATE DATABASE as well 8263 is_database = self._match(TokenType.DATABASE) 8264 8265 self._match(TokenType.TABLE) 8266 8267 exists = self._parse_exists(not_=False) 8268 8269 expressions = self._parse_csv( 8270 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8271 ) 8272 8273 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8274 8275 if self._match_text_seq("RESTART", "IDENTITY"): 8276 identity = "RESTART" 8277 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8278 identity = "CONTINUE" 8279 else: 8280 identity = None 8281 8282 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8283 option = self._prev.text 8284 else: 8285 option = None 8286 8287 partition = self._parse_partition() 8288 8289 # Fallback case 8290 if self._curr: 8291 return self._parse_as_command(start) 8292 8293 return self.expression( 8294 exp.TruncateTable, 8295 expressions=expressions, 8296 is_database=is_database, 8297 exists=exists, 8298 cluster=cluster, 8299 identity=identity, 8300 option=option, 8301 partition=partition, 8302 ) 8303 8304 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8305 this = self._parse_ordered(self._parse_opclass) 8306 8307 if not self._match(TokenType.WITH): 8308 return this 8309 8310 op = self._parse_var(any_token=True) 8311 8312 return self.expression(exp.WithOperator, this=this, op=op) 8313 8314 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8315 self._match(TokenType.EQ) 8316 self._match(TokenType.L_PAREN) 8317 8318 opts: t.List[t.Optional[exp.Expression]] = [] 8319 option: exp.Expression | None 8320 while self._curr and not self._match(TokenType.R_PAREN): 8321 if self._match_text_seq("FORMAT_NAME", "="): 8322 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8323 option = self._parse_format_name() 8324 else: 8325 option = self._parse_property() 8326 8327 if option is None: 8328 self.raise_error("Unable to parse option") 8329 break 8330 8331 opts.append(option) 8332 8333 return opts 8334 8335 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8336 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8337 8338 options = [] 8339 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8340 option = self._parse_var(any_token=True) 8341 prev = self._prev.text.upper() 8342 8343 # Different dialects might separate options and values by white space, "=" and "AS" 8344 self._match(TokenType.EQ) 8345 self._match(TokenType.ALIAS) 8346 8347 param = self.expression(exp.CopyParameter, this=option) 8348 8349 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8350 TokenType.L_PAREN, advance=False 8351 ): 8352 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8353 param.set("expressions", self._parse_wrapped_options()) 8354 elif prev == "FILE_FORMAT": 8355 # T-SQL's external file format case 8356 param.set("expression", self._parse_field()) 8357 else: 8358 param.set("expression", self._parse_unquoted_field()) 8359 8360 options.append(param) 8361 self._match(sep) 8362 8363 return options 8364 8365 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8366 expr = self.expression(exp.Credentials) 8367 8368 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8369 expr.set("storage", self._parse_field()) 8370 if self._match_text_seq("CREDENTIALS"): 8371 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8372 creds = ( 8373 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8374 ) 8375 expr.set("credentials", creds) 8376 if self._match_text_seq("ENCRYPTION"): 8377 expr.set("encryption", self._parse_wrapped_options()) 8378 if self._match_text_seq("IAM_ROLE"): 8379 expr.set("iam_role", self._parse_field()) 8380 if self._match_text_seq("REGION"): 8381 expr.set("region", self._parse_field()) 8382 8383 return expr 8384 8385 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8386 return self._parse_field() 8387 8388 def _parse_copy(self) -> exp.Copy | exp.Command: 8389 start = self._prev 8390 8391 self._match(TokenType.INTO) 8392 8393 this = ( 8394 self._parse_select(nested=True, parse_subquery_alias=False) 8395 if self._match(TokenType.L_PAREN, advance=False) 8396 else self._parse_table(schema=True) 8397 ) 8398 8399 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8400 8401 files = self._parse_csv(self._parse_file_location) 8402 if self._match(TokenType.EQ, advance=False): 8403 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8404 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8405 # list via `_parse_wrapped(..)` below. 8406 self._advance(-1) 8407 files = [] 8408 8409 credentials = self._parse_credentials() 8410 8411 self._match_text_seq("WITH") 8412 8413 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8414 8415 # Fallback case 8416 if self._curr: 8417 return self._parse_as_command(start) 8418 8419 return self.expression( 8420 exp.Copy, 8421 this=this, 8422 kind=kind, 8423 credentials=credentials, 8424 files=files, 8425 params=params, 8426 ) 8427 8428 def _parse_normalize(self) -> exp.Normalize: 8429 return self.expression( 8430 exp.Normalize, 8431 this=self._parse_bitwise(), 8432 form=self._match(TokenType.COMMA) and self._parse_var(), 8433 ) 8434 8435 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8436 args = self._parse_csv(lambda: self._parse_lambda()) 8437 8438 this = seq_get(args, 0) 8439 decimals = seq_get(args, 1) 8440 8441 return expr_type( 8442 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8443 ) 8444 8445 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8446 star_token = self._prev 8447 8448 if self._match_text_seq("COLUMNS", "(", advance=False): 8449 this = self._parse_function() 8450 if isinstance(this, exp.Columns): 8451 this.set("unpack", True) 8452 return this 8453 8454 return self.expression( 8455 exp.Star, 8456 **{ # type: ignore 8457 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8458 "replace": self._parse_star_op("REPLACE"), 8459 "rename": self._parse_star_op("RENAME"), 8460 }, 8461 ).update_positions(star_token) 8462 8463 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8464 privilege_parts = [] 8465 8466 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8467 # (end of privilege list) or L_PAREN (start of column list) are met 8468 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8469 privilege_parts.append(self._curr.text.upper()) 8470 self._advance() 8471 8472 this = exp.var(" ".join(privilege_parts)) 8473 expressions = ( 8474 self._parse_wrapped_csv(self._parse_column) 8475 if self._match(TokenType.L_PAREN, advance=False) 8476 else None 8477 ) 8478 8479 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8480 8481 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8482 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8483 principal = self._parse_id_var() 8484 8485 if not principal: 8486 return None 8487 8488 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8489 8490 def _parse_grant_revoke_common( 8491 self, 8492 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8493 privileges = self._parse_csv(self._parse_grant_privilege) 8494 8495 self._match(TokenType.ON) 8496 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8497 8498 # Attempt to parse the securable e.g. MySQL allows names 8499 # such as "foo.*", "*.*" which are not easily parseable yet 8500 securable = self._try_parse(self._parse_table_parts) 8501 8502 return privileges, kind, securable 8503 8504 def _parse_grant(self) -> exp.Grant | exp.Command: 8505 start = self._prev 8506 8507 privileges, kind, securable = self._parse_grant_revoke_common() 8508 8509 if not securable or not self._match_text_seq("TO"): 8510 return self._parse_as_command(start) 8511 8512 principals = self._parse_csv(self._parse_grant_principal) 8513 8514 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8515 8516 if self._curr: 8517 return self._parse_as_command(start) 8518 8519 return self.expression( 8520 exp.Grant, 8521 privileges=privileges, 8522 kind=kind, 8523 securable=securable, 8524 principals=principals, 8525 grant_option=grant_option, 8526 ) 8527 8528 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8529 start = self._prev 8530 8531 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8532 8533 privileges, kind, securable = self._parse_grant_revoke_common() 8534 8535 if not securable or not self._match_text_seq("FROM"): 8536 return self._parse_as_command(start) 8537 8538 principals = self._parse_csv(self._parse_grant_principal) 8539 8540 cascade = None 8541 if self._match_texts(("CASCADE", "RESTRICT")): 8542 cascade = self._prev.text.upper() 8543 8544 if self._curr: 8545 return self._parse_as_command(start) 8546 8547 return self.expression( 8548 exp.Revoke, 8549 privileges=privileges, 8550 kind=kind, 8551 securable=securable, 8552 principals=principals, 8553 grant_option=grant_option, 8554 cascade=cascade, 8555 ) 8556 8557 def _parse_overlay(self) -> exp.Overlay: 8558 return self.expression( 8559 exp.Overlay, 8560 **{ # type: ignore 8561 "this": self._parse_bitwise(), 8562 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8563 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8564 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8565 }, 8566 ) 8567 8568 def _parse_format_name(self) -> exp.Property: 8569 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8570 # for FILE_FORMAT = <format_name> 8571 return self.expression( 8572 exp.Property, 8573 this=exp.var("FORMAT_NAME"), 8574 value=self._parse_string() or self._parse_table_parts(), 8575 ) 8576 8577 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8578 args: t.List[exp.Expression] = [] 8579 8580 if self._match(TokenType.DISTINCT): 8581 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8582 self._match(TokenType.COMMA) 8583 8584 args.extend(self._parse_csv(self._parse_assignment)) 8585 8586 return self.expression( 8587 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8588 ) 8589 8590 def _identifier_expression( 8591 self, token: t.Optional[Token] = None, **kwargs: t.Any 8592 ) -> exp.Identifier: 8593 token = token or self._prev 8594 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8595 expression.update_positions(token) 8596 return expression 8597 8598 def _build_pipe_cte( 8599 self, 8600 query: exp.Query, 8601 expressions: t.List[exp.Expression], 8602 alias_cte: t.Optional[exp.TableAlias] = None, 8603 ) -> exp.Select: 8604 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8605 if alias_cte: 8606 new_cte = alias_cte 8607 else: 8608 self._pipe_cte_counter += 1 8609 new_cte = f"__tmp{self._pipe_cte_counter}" 8610 8611 with_ = query.args.get("with") 8612 ctes = with_.pop() if with_ else None 8613 8614 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8615 if ctes: 8616 new_select.set("with", ctes) 8617 8618 return new_select.with_(new_cte, as_=query, copy=False) 8619 8620 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8621 select = self._parse_select(consume_pipe=False) 8622 if not select: 8623 return query 8624 8625 return self._build_pipe_cte( 8626 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8627 ) 8628 8629 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8630 limit = self._parse_limit() 8631 offset = self._parse_offset() 8632 if limit: 8633 curr_limit = query.args.get("limit", limit) 8634 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8635 query.limit(limit, copy=False) 8636 if offset: 8637 curr_offset = query.args.get("offset") 8638 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8639 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8640 8641 return query 8642 8643 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8644 this = self._parse_assignment() 8645 if self._match_text_seq("GROUP", "AND", advance=False): 8646 return this 8647 8648 this = self._parse_alias(this) 8649 8650 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8651 return self._parse_ordered(lambda: this) 8652 8653 return this 8654 8655 def _parse_pipe_syntax_aggregate_group_order_by( 8656 self, query: exp.Select, group_by_exists: bool = True 8657 ) -> exp.Select: 8658 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8659 aggregates_or_groups, orders = [], [] 8660 for element in expr: 8661 if isinstance(element, exp.Ordered): 8662 this = element.this 8663 if isinstance(this, exp.Alias): 8664 element.set("this", this.args["alias"]) 8665 orders.append(element) 8666 else: 8667 this = element 8668 aggregates_or_groups.append(this) 8669 8670 if group_by_exists: 8671 query.select(*aggregates_or_groups, copy=False).group_by( 8672 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8673 copy=False, 8674 ) 8675 else: 8676 query.select(*aggregates_or_groups, append=False, copy=False) 8677 8678 if orders: 8679 return query.order_by(*orders, append=False, copy=False) 8680 8681 return query 8682 8683 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8684 self._match_text_seq("AGGREGATE") 8685 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8686 8687 if self._match(TokenType.GROUP_BY) or ( 8688 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8689 ): 8690 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8691 8692 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8693 8694 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8695 first_setop = self.parse_set_operation(this=query) 8696 if not first_setop: 8697 return None 8698 8699 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8700 expr = self._parse_paren() 8701 return expr.assert_is(exp.Subquery).unnest() if expr else None 8702 8703 first_setop.this.pop() 8704 8705 setops = [ 8706 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8707 *self._parse_csv(_parse_and_unwrap_query), 8708 ] 8709 8710 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8711 with_ = query.args.get("with") 8712 ctes = with_.pop() if with_ else None 8713 8714 if isinstance(first_setop, exp.Union): 8715 query = query.union(*setops, copy=False, **first_setop.args) 8716 elif isinstance(first_setop, exp.Except): 8717 query = query.except_(*setops, copy=False, **first_setop.args) 8718 else: 8719 query = query.intersect(*setops, copy=False, **first_setop.args) 8720 8721 query.set("with", ctes) 8722 8723 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8724 8725 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8726 join = self._parse_join() 8727 if not join: 8728 return None 8729 8730 if isinstance(query, exp.Select): 8731 return query.join(join, copy=False) 8732 8733 return query 8734 8735 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8736 pivots = self._parse_pivots() 8737 if not pivots: 8738 return query 8739 8740 from_ = query.args.get("from") 8741 if from_: 8742 from_.this.set("pivots", pivots) 8743 8744 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8745 8746 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8747 self._match_text_seq("EXTEND") 8748 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8749 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8750 8751 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8752 sample = self._parse_table_sample() 8753 8754 with_ = query.args.get("with") 8755 if with_: 8756 with_.expressions[-1].this.set("sample", sample) 8757 else: 8758 query.set("sample", sample) 8759 8760 return query 8761 8762 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8763 if isinstance(query, exp.Subquery): 8764 query = exp.select("*").from_(query, copy=False) 8765 8766 if not query.args.get("from"): 8767 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8768 8769 while self._match(TokenType.PIPE_GT): 8770 start = self._curr 8771 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8772 if not parser: 8773 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8774 # keywords, making it tricky to disambiguate them without lookahead. The approach 8775 # here is to try and parse a set operation and if that fails, then try to parse a 8776 # join operator. If that fails as well, then the operator is not supported. 8777 parsed_query = self._parse_pipe_syntax_set_operator(query) 8778 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8779 if not parsed_query: 8780 self._retreat(start) 8781 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8782 break 8783 query = parsed_query 8784 else: 8785 query = parser(self, query) 8786 8787 return query 8788 8789 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8790 vars = self._parse_csv(self._parse_id_var) 8791 if not vars: 8792 return None 8793 8794 return self.expression( 8795 exp.DeclareItem, 8796 this=vars, 8797 kind=self._parse_types(), 8798 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8799 ) 8800 8801 def _parse_declare(self) -> exp.Declare | exp.Command: 8802 start = self._prev 8803 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8804 8805 if not expressions or self._curr: 8806 return self._parse_as_command(start) 8807 8808 return self.expression(exp.Declare, expressions=expressions) 8809 8810 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8811 exp_class = exp.Cast if strict else exp.TryCast 8812 8813 if exp_class == exp.TryCast: 8814 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8815 8816 return self.expression(exp_class, **kwargs) 8817 8818 def _parse_json_value(self) -> exp.JSONValue: 8819 this = self._parse_bitwise() 8820 self._match(TokenType.COMMA) 8821 path = self._parse_bitwise() 8822 8823 returning = self._match(TokenType.RETURNING) and self._parse_type() 8824 8825 return self.expression( 8826 exp.JSONValue, 8827 this=this, 8828 path=self.dialect.to_json_path(path), 8829 returning=returning, 8830 on_condition=self._parse_on_condition(), 8831 ) 8832 8833 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8834 def concat_exprs( 8835 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8836 ) -> exp.Expression: 8837 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8838 concat_exprs = [ 8839 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8840 ] 8841 node.set("expressions", concat_exprs) 8842 return node 8843 if len(exprs) == 1: 8844 return exprs[0] 8845 return self.expression(exp.Concat, expressions=args, safe=True) 8846 8847 args = self._parse_csv(self._parse_lambda) 8848 8849 if args: 8850 order = args[-1] if isinstance(args[-1], exp.Order) else None 8851 8852 if order: 8853 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8854 # remove 'expr' from exp.Order and add it back to args 8855 args[-1] = order.this 8856 order.set("this", concat_exprs(order.this, args)) 8857 8858 this = order or concat_exprs(args[0], args) 8859 else: 8860 this = None 8861 8862 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8863 8864 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.Group: lambda self: self._parse_group(), 813 exp.Having: lambda self: self._parse_having(), 814 exp.Hint: lambda self: self._parse_hint_body(), 815 exp.Identifier: lambda self: self._parse_id_var(), 816 exp.Join: lambda self: self._parse_join(), 817 exp.Lambda: lambda self: self._parse_lambda(), 818 exp.Lateral: lambda self: self._parse_lateral(), 819 exp.Limit: lambda self: self._parse_limit(), 820 exp.Offset: lambda self: self._parse_offset(), 821 exp.Order: lambda self: self._parse_order(), 822 exp.Ordered: lambda self: self._parse_ordered(), 823 exp.Properties: lambda self: self._parse_properties(), 824 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 825 exp.Qualify: lambda self: self._parse_qualify(), 826 exp.Returning: lambda self: self._parse_returning(), 827 exp.Select: lambda self: self._parse_select(), 828 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 829 exp.Table: lambda self: self._parse_table_parts(), 830 exp.TableAlias: lambda self: self._parse_table_alias(), 831 exp.Tuple: lambda self: self._parse_value(values=False), 832 exp.Whens: lambda self: self._parse_when_matched(), 833 exp.Where: lambda self: self._parse_where(), 834 exp.Window: lambda self: self._parse_named_window(), 835 exp.With: lambda self: self._parse_with(), 836 "JOIN_TYPE": lambda self: self._parse_join_parts(), 837 } 838 839 STATEMENT_PARSERS = { 840 TokenType.ALTER: lambda self: self._parse_alter(), 841 TokenType.ANALYZE: lambda self: self._parse_analyze(), 842 TokenType.BEGIN: lambda self: self._parse_transaction(), 843 TokenType.CACHE: lambda self: self._parse_cache(), 844 TokenType.COMMENT: lambda self: self._parse_comment(), 845 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 846 TokenType.COPY: lambda self: self._parse_copy(), 847 TokenType.CREATE: lambda self: self._parse_create(), 848 TokenType.DELETE: lambda self: self._parse_delete(), 849 TokenType.DESC: lambda self: self._parse_describe(), 850 TokenType.DESCRIBE: lambda self: self._parse_describe(), 851 TokenType.DROP: lambda self: self._parse_drop(), 852 TokenType.GRANT: lambda self: self._parse_grant(), 853 TokenType.REVOKE: lambda self: self._parse_revoke(), 854 TokenType.INSERT: lambda self: self._parse_insert(), 855 TokenType.KILL: lambda self: self._parse_kill(), 856 TokenType.LOAD: lambda self: self._parse_load(), 857 TokenType.MERGE: lambda self: self._parse_merge(), 858 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 859 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 860 TokenType.REFRESH: lambda self: self._parse_refresh(), 861 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 862 TokenType.SET: lambda self: self._parse_set(), 863 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 864 TokenType.UNCACHE: lambda self: self._parse_uncache(), 865 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 866 TokenType.UPDATE: lambda self: self._parse_update(), 867 TokenType.USE: lambda self: self._parse_use(), 868 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 869 } 870 871 UNARY_PARSERS = { 872 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 873 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 874 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 875 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 876 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 877 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 878 } 879 880 STRING_PARSERS = { 881 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 882 exp.RawString, this=token.text 883 ), 884 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 885 exp.National, this=token.text 886 ), 887 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 888 TokenType.STRING: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=True 890 ), 891 TokenType.UNICODE_STRING: lambda self, token: self.expression( 892 exp.UnicodeString, 893 this=token.text, 894 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 895 ), 896 } 897 898 NUMERIC_PARSERS = { 899 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 900 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 901 TokenType.HEX_STRING: lambda self, token: self.expression( 902 exp.HexString, 903 this=token.text, 904 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 905 ), 906 TokenType.NUMBER: lambda self, token: self.expression( 907 exp.Literal, this=token.text, is_string=False 908 ), 909 } 910 911 PRIMARY_PARSERS = { 912 **STRING_PARSERS, 913 **NUMERIC_PARSERS, 914 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 915 TokenType.NULL: lambda self, _: self.expression(exp.Null), 916 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 917 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 918 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 919 TokenType.STAR: lambda self, _: self._parse_star_ops(), 920 } 921 922 PLACEHOLDER_PARSERS = { 923 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 924 TokenType.PARAMETER: lambda self: self._parse_parameter(), 925 TokenType.COLON: lambda self: ( 926 self.expression(exp.Placeholder, this=self._prev.text) 927 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 928 else None 929 ), 930 } 931 932 RANGE_PARSERS = { 933 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 934 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 935 TokenType.GLOB: binary_range_parser(exp.Glob), 936 TokenType.ILIKE: binary_range_parser(exp.ILike), 937 TokenType.IN: lambda self, this: self._parse_in(this), 938 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 939 TokenType.IS: lambda self, this: self._parse_is(this), 940 TokenType.LIKE: binary_range_parser(exp.Like), 941 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 942 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 943 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 944 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 945 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 946 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 947 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 948 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 949 } 950 951 PIPE_SYNTAX_TRANSFORM_PARSERS = { 952 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 953 "AS": lambda self, query: self._build_pipe_cte( 954 query, [exp.Star()], self._parse_table_alias() 955 ), 956 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 957 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 958 "ORDER BY": lambda self, query: query.order_by( 959 self._parse_order(), append=False, copy=False 960 ), 961 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 962 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 963 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 964 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 966 } 967 968 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 969 "ALLOWED_VALUES": lambda self: self.expression( 970 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 971 ), 972 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 973 "AUTO": lambda self: self._parse_auto_property(), 974 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 975 "BACKUP": lambda self: self.expression( 976 exp.BackupProperty, this=self._parse_var(any_token=True) 977 ), 978 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 979 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHECKSUM": lambda self: self._parse_checksum(), 982 "CLUSTER BY": lambda self: self._parse_cluster(), 983 "CLUSTERED": lambda self: self._parse_clustered_by(), 984 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 985 exp.CollateProperty, **kwargs 986 ), 987 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 988 "CONTAINS": lambda self: self._parse_contains_property(), 989 "COPY": lambda self: self._parse_copy_property(), 990 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 991 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 992 "DEFINER": lambda self: self._parse_definer(), 993 "DETERMINISTIC": lambda self: self.expression( 994 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 995 ), 996 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 997 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 998 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 999 "DISTKEY": lambda self: self._parse_distkey(), 1000 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1001 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1002 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1003 "ENVIRONMENT": lambda self: self.expression( 1004 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1005 ), 1006 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1007 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1008 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1009 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1010 "FREESPACE": lambda self: self._parse_freespace(), 1011 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1012 "HEAP": lambda self: self.expression(exp.HeapProperty), 1013 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1014 "IMMUTABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1016 ), 1017 "INHERITS": lambda self: self.expression( 1018 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1019 ), 1020 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1021 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1022 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1023 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1024 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1027 "LOCK": lambda self: self._parse_locking(), 1028 "LOCKING": lambda self: self._parse_locking(), 1029 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1030 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1031 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1032 "MODIFIES": lambda self: self._parse_modifies_property(), 1033 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1034 "NO": lambda self: self._parse_no_property(), 1035 "ON": lambda self: self._parse_on_property(), 1036 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1037 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1038 "PARTITION": lambda self: self._parse_partitioned_of(), 1039 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1043 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1044 "READS": lambda self: self._parse_reads_property(), 1045 "REMOTE": lambda self: self._parse_remote_with_connection(), 1046 "RETURNS": lambda self: self._parse_returns(), 1047 "STRICT": lambda self: self.expression(exp.StrictProperty), 1048 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1049 "ROW": lambda self: self._parse_row(), 1050 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1051 "SAMPLE": lambda self: self.expression( 1052 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1053 ), 1054 "SECURE": lambda self: self.expression(exp.SecureProperty), 1055 "SECURITY": lambda self: self._parse_security(), 1056 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1057 "SETTINGS": lambda self: self._parse_settings_property(), 1058 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1059 "SORTKEY": lambda self: self._parse_sortkey(), 1060 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1061 "STABLE": lambda self: self.expression( 1062 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1063 ), 1064 "STORED": lambda self: self._parse_stored(), 1065 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1066 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1067 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1068 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1069 "TO": lambda self: self._parse_to_table(), 1070 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1071 "TRANSFORM": lambda self: self.expression( 1072 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1073 ), 1074 "TTL": lambda self: self._parse_ttl(), 1075 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1076 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1077 "VOLATILE": lambda self: self._parse_volatile_property(), 1078 "WITH": lambda self: self._parse_with_property(), 1079 } 1080 1081 CONSTRAINT_PARSERS = { 1082 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1083 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1084 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1085 "CHARACTER SET": lambda self: self.expression( 1086 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1087 ), 1088 "CHECK": lambda self: self.expression( 1089 exp.CheckColumnConstraint, 1090 this=self._parse_wrapped(self._parse_assignment), 1091 enforced=self._match_text_seq("ENFORCED"), 1092 ), 1093 "COLLATE": lambda self: self.expression( 1094 exp.CollateColumnConstraint, 1095 this=self._parse_identifier() or self._parse_column(), 1096 ), 1097 "COMMENT": lambda self: self.expression( 1098 exp.CommentColumnConstraint, this=self._parse_string() 1099 ), 1100 "COMPRESS": lambda self: self._parse_compress(), 1101 "CLUSTERED": lambda self: self.expression( 1102 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1103 ), 1104 "NONCLUSTERED": lambda self: self.expression( 1105 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "DEFAULT": lambda self: self.expression( 1108 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1111 "EPHEMERAL": lambda self: self.expression( 1112 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1113 ), 1114 "EXCLUDE": lambda self: self.expression( 1115 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1116 ), 1117 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1118 "FORMAT": lambda self: self.expression( 1119 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1120 ), 1121 "GENERATED": lambda self: self._parse_generated_as_identity(), 1122 "IDENTITY": lambda self: self._parse_auto_increment(), 1123 "INLINE": lambda self: self._parse_inline(), 1124 "LIKE": lambda self: self._parse_create_like(), 1125 "NOT": lambda self: self._parse_not_constraint(), 1126 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1127 "ON": lambda self: ( 1128 self._match(TokenType.UPDATE) 1129 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1130 ) 1131 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1132 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1133 "PERIOD": lambda self: self._parse_period_for_system_time(), 1134 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1135 "REFERENCES": lambda self: self._parse_references(match=False), 1136 "TITLE": lambda self: self.expression( 1137 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1138 ), 1139 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1140 "UNIQUE": lambda self: self._parse_unique(), 1141 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1142 "WATERMARK": lambda self: self.expression( 1143 exp.WatermarkColumnConstraint, 1144 this=self._match(TokenType.FOR) and self._parse_column(), 1145 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1146 ), 1147 "WITH": lambda self: self.expression( 1148 exp.Properties, expressions=self._parse_wrapped_properties() 1149 ), 1150 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 } 1153 1154 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1155 if not self._match(TokenType.L_PAREN, advance=False): 1156 # Partitioning by bucket or truncate follows the syntax: 1157 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1158 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1159 self._retreat(self._index - 1) 1160 return None 1161 1162 klass = ( 1163 exp.PartitionedByBucket 1164 if self._prev.text.upper() == "BUCKET" 1165 else exp.PartitionByTruncate 1166 ) 1167 1168 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1169 this, expression = seq_get(args, 0), seq_get(args, 1) 1170 1171 if isinstance(this, exp.Literal): 1172 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1173 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1174 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1175 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1176 # 1177 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1178 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1179 this, expression = expression, this 1180 1181 return self.expression(klass, this=this, expression=expression) 1182 1183 ALTER_PARSERS = { 1184 "ADD": lambda self: self._parse_alter_table_add(), 1185 "AS": lambda self: self._parse_select(), 1186 "ALTER": lambda self: self._parse_alter_table_alter(), 1187 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1188 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1189 "DROP": lambda self: self._parse_alter_table_drop(), 1190 "RENAME": lambda self: self._parse_alter_table_rename(), 1191 "SET": lambda self: self._parse_alter_table_set(), 1192 "SWAP": lambda self: self.expression( 1193 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1194 ), 1195 } 1196 1197 ALTER_ALTER_PARSERS = { 1198 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1199 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1200 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1201 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1202 } 1203 1204 SCHEMA_UNNAMED_CONSTRAINTS = { 1205 "CHECK", 1206 "EXCLUDE", 1207 "FOREIGN KEY", 1208 "LIKE", 1209 "PERIOD", 1210 "PRIMARY KEY", 1211 "UNIQUE", 1212 "WATERMARK", 1213 "BUCKET", 1214 "TRUNCATE", 1215 } 1216 1217 NO_PAREN_FUNCTION_PARSERS = { 1218 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1219 "CASE": lambda self: self._parse_case(), 1220 "CONNECT_BY_ROOT": lambda self: self.expression( 1221 exp.ConnectByRoot, this=self._parse_column() 1222 ), 1223 "IF": lambda self: self._parse_if(), 1224 } 1225 1226 INVALID_FUNC_NAME_TOKENS = { 1227 TokenType.IDENTIFIER, 1228 TokenType.STRING, 1229 } 1230 1231 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1232 1233 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1234 1235 FUNCTION_PARSERS = { 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1238 }, 1239 **{ 1240 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1241 }, 1242 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1243 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1244 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1245 "DECODE": lambda self: self._parse_decode(), 1246 "EXTRACT": lambda self: self._parse_extract(), 1247 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1248 "GAP_FILL": lambda self: self._parse_gap_fill(), 1249 "JSON_OBJECT": lambda self: self._parse_json_object(), 1250 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1251 "JSON_TABLE": lambda self: self._parse_json_table(), 1252 "MATCH": lambda self: self._parse_match_against(), 1253 "NORMALIZE": lambda self: self._parse_normalize(), 1254 "OPENJSON": lambda self: self._parse_open_json(), 1255 "OVERLAY": lambda self: self._parse_overlay(), 1256 "POSITION": lambda self: self._parse_position(), 1257 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "STRING_AGG": lambda self: self._parse_string_agg(), 1259 "SUBSTRING": lambda self: self._parse_substring(), 1260 "TRIM": lambda self: self._parse_trim(), 1261 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1262 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1263 "XMLELEMENT": lambda self: self.expression( 1264 exp.XMLElement, 1265 this=self._match_text_seq("NAME") and self._parse_id_var(), 1266 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1267 ), 1268 "XMLTABLE": lambda self: self._parse_xml_table(), 1269 } 1270 1271 QUERY_MODIFIER_PARSERS = { 1272 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1273 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1274 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1275 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1276 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1277 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1278 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1279 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1280 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1281 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1282 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1283 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1284 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1285 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.CLUSTER_BY: lambda self: ( 1288 "cluster", 1289 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1290 ), 1291 TokenType.DISTRIBUTE_BY: lambda self: ( 1292 "distribute", 1293 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1294 ), 1295 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1296 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1297 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1298 } 1299 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1300 1301 SET_PARSERS = { 1302 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1303 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1304 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1305 "TRANSACTION": lambda self: self._parse_set_transaction(), 1306 } 1307 1308 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1309 1310 TYPE_LITERAL_PARSERS = { 1311 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1312 } 1313 1314 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1315 1316 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1317 1318 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1319 1320 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1321 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1322 "ISOLATION": ( 1323 ("LEVEL", "REPEATABLE", "READ"), 1324 ("LEVEL", "READ", "COMMITTED"), 1325 ("LEVEL", "READ", "UNCOMITTED"), 1326 ("LEVEL", "SERIALIZABLE"), 1327 ), 1328 "READ": ("WRITE", "ONLY"), 1329 } 1330 1331 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1332 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1333 ) 1334 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1335 1336 CREATE_SEQUENCE: OPTIONS_TYPE = { 1337 "SCALE": ("EXTEND", "NOEXTEND"), 1338 "SHARD": ("EXTEND", "NOEXTEND"), 1339 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1340 **dict.fromkeys( 1341 ( 1342 "SESSION", 1343 "GLOBAL", 1344 "KEEP", 1345 "NOKEEP", 1346 "ORDER", 1347 "NOORDER", 1348 "NOCACHE", 1349 "CYCLE", 1350 "NOCYCLE", 1351 "NOMINVALUE", 1352 "NOMAXVALUE", 1353 "NOSCALE", 1354 "NOSHARD", 1355 ), 1356 tuple(), 1357 ), 1358 } 1359 1360 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1361 1362 USABLES: OPTIONS_TYPE = dict.fromkeys( 1363 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1364 ) 1365 1366 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1367 1368 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1369 "TYPE": ("EVOLUTION",), 1370 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1371 } 1372 1373 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1374 1375 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1376 1377 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1378 "NOT": ("ENFORCED",), 1379 "MATCH": ( 1380 "FULL", 1381 "PARTIAL", 1382 "SIMPLE", 1383 ), 1384 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1385 "USING": ( 1386 "BTREE", 1387 "HASH", 1388 ), 1389 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1390 } 1391 1392 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1393 "NO": ("OTHERS",), 1394 "CURRENT": ("ROW",), 1395 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1396 } 1397 1398 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1399 1400 CLONE_KEYWORDS = {"CLONE", "COPY"} 1401 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1402 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1403 1404 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1405 1406 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1407 1408 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1409 1410 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1411 1412 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1413 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1414 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1415 1416 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1417 1418 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1419 1420 ADD_CONSTRAINT_TOKENS = { 1421 TokenType.CONSTRAINT, 1422 TokenType.FOREIGN_KEY, 1423 TokenType.INDEX, 1424 TokenType.KEY, 1425 TokenType.PRIMARY_KEY, 1426 TokenType.UNIQUE, 1427 } 1428 1429 DISTINCT_TOKENS = {TokenType.DISTINCT} 1430 1431 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1432 1433 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1434 1435 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1436 1437 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1438 1439 ODBC_DATETIME_LITERALS = { 1440 "d": exp.Date, 1441 "t": exp.Time, 1442 "ts": exp.Timestamp, 1443 } 1444 1445 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1446 1447 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1448 1449 # The style options for the DESCRIBE statement 1450 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1451 1452 # The style options for the ANALYZE statement 1453 ANALYZE_STYLES = { 1454 "BUFFER_USAGE_LIMIT", 1455 "FULL", 1456 "LOCAL", 1457 "NO_WRITE_TO_BINLOG", 1458 "SAMPLE", 1459 "SKIP_LOCKED", 1460 "VERBOSE", 1461 } 1462 1463 ANALYZE_EXPRESSION_PARSERS = { 1464 "ALL": lambda self: self._parse_analyze_columns(), 1465 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1466 "DELETE": lambda self: self._parse_analyze_delete(), 1467 "DROP": lambda self: self._parse_analyze_histogram(), 1468 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1469 "LIST": lambda self: self._parse_analyze_list(), 1470 "PREDICATE": lambda self: self._parse_analyze_columns(), 1471 "UPDATE": lambda self: self._parse_analyze_histogram(), 1472 "VALIDATE": lambda self: self._parse_analyze_validate(), 1473 } 1474 1475 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1476 1477 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1478 1479 OPERATION_MODIFIERS: t.Set[str] = set() 1480 1481 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1482 1483 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1484 1485 STRICT_CAST = True 1486 1487 PREFIXED_PIVOT_COLUMNS = False 1488 IDENTIFY_PIVOT_STRINGS = False 1489 1490 LOG_DEFAULTS_TO_LN = False 1491 1492 # Whether the table sample clause expects CSV syntax 1493 TABLESAMPLE_CSV = False 1494 1495 # The default method used for table sampling 1496 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1497 1498 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1499 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1500 1501 # Whether the TRIM function expects the characters to trim as its first argument 1502 TRIM_PATTERN_FIRST = False 1503 1504 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1505 STRING_ALIASES = False 1506 1507 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1508 MODIFIERS_ATTACHED_TO_SET_OP = True 1509 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1510 1511 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1512 NO_PAREN_IF_COMMANDS = True 1513 1514 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1515 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1516 1517 # Whether the `:` operator is used to extract a value from a VARIANT column 1518 COLON_IS_VARIANT_EXTRACT = False 1519 1520 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1521 # If this is True and '(' is not found, the keyword will be treated as an identifier 1522 VALUES_FOLLOWED_BY_PAREN = True 1523 1524 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1525 SUPPORTS_IMPLICIT_UNNEST = False 1526 1527 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1528 INTERVAL_SPANS = True 1529 1530 # Whether a PARTITION clause can follow a table reference 1531 SUPPORTS_PARTITION_SELECTION = False 1532 1533 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1534 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1535 1536 # Whether the 'AS' keyword is optional in the CTE definition syntax 1537 OPTIONAL_ALIAS_TOKEN_CTE = True 1538 1539 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1540 ALTER_RENAME_REQUIRES_COLUMN = True 1541 1542 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1543 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1544 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1545 # as BigQuery, where all joins have the same precedence. 1546 JOINS_HAVE_EQUAL_PRECEDENCE = False 1547 1548 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1549 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1550 1551 # Whether map literals support arbitrary expressions as keys. 1552 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1553 # When False, keys are typically restricted to identifiers. 1554 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1555 1556 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1557 # is true for Snowflake but not for BigQuery which can also process strings 1558 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1559 1560 # Dialects like Databricks support JOINS without join criteria 1561 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1562 ADD_JOIN_ON_TRUE = False 1563 1564 __slots__ = ( 1565 "error_level", 1566 "error_message_context", 1567 "max_errors", 1568 "dialect", 1569 "sql", 1570 "errors", 1571 "_tokens", 1572 "_index", 1573 "_curr", 1574 "_next", 1575 "_prev", 1576 "_prev_comments", 1577 "_pipe_cte_counter", 1578 ) 1579 1580 # Autofilled 1581 SHOW_TRIE: t.Dict = {} 1582 SET_TRIE: t.Dict = {} 1583 1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset() 1598 1599 def reset(self): 1600 self.sql = "" 1601 self.errors = [] 1602 self._tokens = [] 1603 self._index = 0 1604 self._curr = None 1605 self._next = None 1606 self._prev = None 1607 self._prev_comments = None 1608 self._pipe_cte_counter = 0 1609 1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 ) 1627 1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1] 1663 1664 def _parse( 1665 self, 1666 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1667 raw_tokens: t.List[Token], 1668 sql: t.Optional[str] = None, 1669 ) -> t.List[t.Optional[exp.Expression]]: 1670 self.reset() 1671 self.sql = sql or "" 1672 1673 total = len(raw_tokens) 1674 chunks: t.List[t.List[Token]] = [[]] 1675 1676 for i, token in enumerate(raw_tokens): 1677 if token.token_type == TokenType.SEMICOLON: 1678 if token.comments: 1679 chunks.append([token]) 1680 1681 if i < total - 1: 1682 chunks.append([]) 1683 else: 1684 chunks[-1].append(token) 1685 1686 expressions = [] 1687 1688 for tokens in chunks: 1689 self._index = -1 1690 self._tokens = tokens 1691 self._advance() 1692 1693 expressions.append(parse_method(self)) 1694 1695 if self._index < len(self._tokens): 1696 self.raise_error("Invalid expression / Unexpected token") 1697 1698 self.check_errors() 1699 1700 return expressions 1701 1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 ) 1712 1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error) 1740 1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance) 1758 1759 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1760 if expression and self._prev_comments: 1761 expression.add_comments(self._prev_comments) 1762 self._prev_comments = None 1763 1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression 1780 1781 def _find_sql(self, start: Token, end: Token) -> str: 1782 return self.sql[start.start : end.end + 1] 1783 1784 def _is_connected(self) -> bool: 1785 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1786 1787 def _advance(self, times: int = 1) -> None: 1788 self._index += times 1789 self._curr = seq_get(self._tokens, self._index) 1790 self._next = seq_get(self._tokens, self._index + 1) 1791 1792 if self._index > 0: 1793 self._prev = self._tokens[self._index - 1] 1794 self._prev_comments = self._prev.comments 1795 else: 1796 self._prev = None 1797 self._prev_comments = None 1798 1799 def _retreat(self, index: int) -> None: 1800 if index != self._index: 1801 self._advance(index - self._index) 1802 1803 def _warn_unsupported(self) -> None: 1804 if len(self._tokens) <= 1: 1805 return 1806 1807 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1808 # interested in emitting a warning for the one being currently processed. 1809 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1810 1811 logger.warning( 1812 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1813 ) 1814 1815 def _parse_command(self) -> exp.Command: 1816 self._warn_unsupported() 1817 return self.expression( 1818 exp.Command, 1819 comments=self._prev_comments, 1820 this=self._prev.text.upper(), 1821 expression=self._parse_string(), 1822 ) 1823 1824 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1825 """ 1826 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1827 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1828 solve this by setting & resetting the parser state accordingly 1829 """ 1830 index = self._index 1831 error_level = self.error_level 1832 1833 self.error_level = ErrorLevel.IMMEDIATE 1834 try: 1835 this = parse_method() 1836 except ParseError: 1837 this = None 1838 finally: 1839 if not this or retreat: 1840 self._retreat(index) 1841 self.error_level = error_level 1842 1843 return this 1844 1845 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1846 start = self._prev 1847 exists = self._parse_exists() if allow_exists else None 1848 1849 self._match(TokenType.ON) 1850 1851 materialized = self._match_text_seq("MATERIALIZED") 1852 kind = self._match_set(self.CREATABLES) and self._prev 1853 if not kind: 1854 return self._parse_as_command(start) 1855 1856 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1857 this = self._parse_user_defined_function(kind=kind.token_type) 1858 elif kind.token_type == TokenType.TABLE: 1859 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1860 elif kind.token_type == TokenType.COLUMN: 1861 this = self._parse_column() 1862 else: 1863 this = self._parse_id_var() 1864 1865 self._match(TokenType.IS) 1866 1867 return self.expression( 1868 exp.Comment, 1869 this=this, 1870 kind=kind.text, 1871 expression=self._parse_string(), 1872 exists=exists, 1873 materialized=materialized, 1874 ) 1875 1876 def _parse_to_table( 1877 self, 1878 ) -> exp.ToTableProperty: 1879 table = self._parse_table_parts(schema=True) 1880 return self.expression(exp.ToTableProperty, this=table) 1881 1882 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1883 def _parse_ttl(self) -> exp.Expression: 1884 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1885 this = self._parse_bitwise() 1886 1887 if self._match_text_seq("DELETE"): 1888 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1889 if self._match_text_seq("RECOMPRESS"): 1890 return self.expression( 1891 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1892 ) 1893 if self._match_text_seq("TO", "DISK"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1896 ) 1897 if self._match_text_seq("TO", "VOLUME"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1900 ) 1901 1902 return this 1903 1904 expressions = self._parse_csv(_parse_ttl_action) 1905 where = self._parse_where() 1906 group = self._parse_group() 1907 1908 aggregates = None 1909 if group and self._match(TokenType.SET): 1910 aggregates = self._parse_csv(self._parse_set_item) 1911 1912 return self.expression( 1913 exp.MergeTreeTTL, 1914 expressions=expressions, 1915 where=where, 1916 group=group, 1917 aggregates=aggregates, 1918 ) 1919 1920 def _parse_statement(self) -> t.Optional[exp.Expression]: 1921 if self._curr is None: 1922 return None 1923 1924 if self._match_set(self.STATEMENT_PARSERS): 1925 comments = self._prev_comments 1926 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1927 stmt.add_comments(comments, prepend=True) 1928 return stmt 1929 1930 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1931 return self._parse_command() 1932 1933 expression = self._parse_expression() 1934 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1935 return self._parse_query_modifiers(expression) 1936 1937 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1938 start = self._prev 1939 temporary = self._match(TokenType.TEMPORARY) 1940 materialized = self._match_text_seq("MATERIALIZED") 1941 1942 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1943 if not kind: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 if_exists = exists or self._parse_exists() 1948 1949 if kind == "COLUMN": 1950 this = self._parse_column() 1951 else: 1952 this = self._parse_table_parts( 1953 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1954 ) 1955 1956 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1957 1958 if self._match(TokenType.L_PAREN, advance=False): 1959 expressions = self._parse_wrapped_csv(self._parse_types) 1960 else: 1961 expressions = None 1962 1963 return self.expression( 1964 exp.Drop, 1965 exists=if_exists, 1966 this=this, 1967 expressions=expressions, 1968 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1969 temporary=temporary, 1970 materialized=materialized, 1971 cascade=self._match_text_seq("CASCADE"), 1972 constraints=self._match_text_seq("CONSTRAINTS"), 1973 purge=self._match_text_seq("PURGE"), 1974 cluster=cluster, 1975 concurrently=concurrently, 1976 ) 1977 1978 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1979 return ( 1980 self._match_text_seq("IF") 1981 and (not not_ or self._match(TokenType.NOT)) 1982 and self._match(TokenType.EXISTS) 1983 ) 1984 1985 def _parse_create(self) -> exp.Create | exp.Command: 1986 # Note: this can't be None because we've matched a statement parser 1987 start = self._prev 1988 1989 replace = ( 1990 start.token_type == TokenType.REPLACE 1991 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1992 or self._match_pair(TokenType.OR, TokenType.ALTER) 1993 ) 1994 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1995 1996 unique = self._match(TokenType.UNIQUE) 1997 1998 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1999 clustered = True 2000 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2001 "COLUMNSTORE" 2002 ): 2003 clustered = False 2004 else: 2005 clustered = None 2006 2007 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2008 self._advance() 2009 2010 properties = None 2011 create_token = self._match_set(self.CREATABLES) and self._prev 2012 2013 if not create_token: 2014 # exp.Properties.Location.POST_CREATE 2015 properties = self._parse_properties() 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not properties or not create_token: 2019 return self._parse_as_command(start) 2020 2021 concurrently = self._match_text_seq("CONCURRENTLY") 2022 exists = self._parse_exists(not_=True) 2023 this = None 2024 expression: t.Optional[exp.Expression] = None 2025 indexes = None 2026 no_schema_binding = None 2027 begin = None 2028 end = None 2029 clone = None 2030 2031 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2032 nonlocal properties 2033 if properties and temp_props: 2034 properties.expressions.extend(temp_props.expressions) 2035 elif temp_props: 2036 properties = temp_props 2037 2038 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2039 this = self._parse_user_defined_function(kind=create_token.token_type) 2040 2041 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2042 extend_props(self._parse_properties()) 2043 2044 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2045 extend_props(self._parse_properties()) 2046 2047 if not expression: 2048 if self._match(TokenType.COMMAND): 2049 expression = self._parse_as_command(self._prev) 2050 else: 2051 begin = self._match(TokenType.BEGIN) 2052 return_ = self._match_text_seq("RETURN") 2053 2054 if self._match(TokenType.STRING, advance=False): 2055 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2056 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2057 expression = self._parse_string() 2058 extend_props(self._parse_properties()) 2059 else: 2060 expression = self._parse_user_defined_function_expression() 2061 2062 end = self._match_text_seq("END") 2063 2064 if return_: 2065 expression = self.expression(exp.Return, this=expression) 2066 elif create_token.token_type == TokenType.INDEX: 2067 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2068 if not self._match(TokenType.ON): 2069 index = self._parse_id_var() 2070 anonymous = False 2071 else: 2072 index = None 2073 anonymous = True 2074 2075 this = self._parse_index(index=index, anonymous=anonymous) 2076 elif create_token.token_type in self.DB_CREATABLES: 2077 table_parts = self._parse_table_parts( 2078 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2079 ) 2080 2081 # exp.Properties.Location.POST_NAME 2082 self._match(TokenType.COMMA) 2083 extend_props(self._parse_properties(before=True)) 2084 2085 this = self._parse_schema(this=table_parts) 2086 2087 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2088 extend_props(self._parse_properties()) 2089 2090 has_alias = self._match(TokenType.ALIAS) 2091 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2092 # exp.Properties.Location.POST_ALIAS 2093 extend_props(self._parse_properties()) 2094 2095 if create_token.token_type == TokenType.SEQUENCE: 2096 expression = self._parse_types() 2097 props = self._parse_properties() 2098 if props: 2099 sequence_props = exp.SequenceProperties() 2100 options = [] 2101 for prop in props: 2102 if isinstance(prop, exp.SequenceProperties): 2103 for arg, value in prop.args.items(): 2104 if arg == "options": 2105 options.extend(value) 2106 else: 2107 sequence_props.set(arg, value) 2108 prop.pop() 2109 2110 if options: 2111 sequence_props.set("options", options) 2112 2113 props.append("expressions", sequence_props) 2114 extend_props(props) 2115 else: 2116 expression = self._parse_ddl_select() 2117 2118 # Some dialects also support using a table as an alias instead of a SELECT. 2119 # Here we fallback to this as an alternative. 2120 if not expression and has_alias: 2121 expression = self._try_parse(self._parse_table_parts) 2122 2123 if create_token.token_type == TokenType.TABLE: 2124 # exp.Properties.Location.POST_EXPRESSION 2125 extend_props(self._parse_properties()) 2126 2127 indexes = [] 2128 while True: 2129 index = self._parse_index() 2130 2131 # exp.Properties.Location.POST_INDEX 2132 extend_props(self._parse_properties()) 2133 if not index: 2134 break 2135 else: 2136 self._match(TokenType.COMMA) 2137 indexes.append(index) 2138 elif create_token.token_type == TokenType.VIEW: 2139 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2140 no_schema_binding = True 2141 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2142 extend_props(self._parse_properties()) 2143 2144 shallow = self._match_text_seq("SHALLOW") 2145 2146 if self._match_texts(self.CLONE_KEYWORDS): 2147 copy = self._prev.text.lower() == "copy" 2148 clone = self.expression( 2149 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2150 ) 2151 2152 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2153 return self._parse_as_command(start) 2154 2155 create_kind_text = create_token.text.upper() 2156 return self.expression( 2157 exp.Create, 2158 this=this, 2159 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2160 replace=replace, 2161 refresh=refresh, 2162 unique=unique, 2163 expression=expression, 2164 exists=exists, 2165 properties=properties, 2166 indexes=indexes, 2167 no_schema_binding=no_schema_binding, 2168 begin=begin, 2169 end=end, 2170 clone=clone, 2171 concurrently=concurrently, 2172 clustered=clustered, 2173 ) 2174 2175 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2176 seq = exp.SequenceProperties() 2177 2178 options = [] 2179 index = self._index 2180 2181 while self._curr: 2182 self._match(TokenType.COMMA) 2183 if self._match_text_seq("INCREMENT"): 2184 self._match_text_seq("BY") 2185 self._match_text_seq("=") 2186 seq.set("increment", self._parse_term()) 2187 elif self._match_text_seq("MINVALUE"): 2188 seq.set("minvalue", self._parse_term()) 2189 elif self._match_text_seq("MAXVALUE"): 2190 seq.set("maxvalue", self._parse_term()) 2191 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2192 self._match_text_seq("=") 2193 seq.set("start", self._parse_term()) 2194 elif self._match_text_seq("CACHE"): 2195 # T-SQL allows empty CACHE which is initialized dynamically 2196 seq.set("cache", self._parse_number() or True) 2197 elif self._match_text_seq("OWNED", "BY"): 2198 # "OWNED BY NONE" is the default 2199 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2200 else: 2201 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2202 if opt: 2203 options.append(opt) 2204 else: 2205 break 2206 2207 seq.set("options", options if options else None) 2208 return None if self._index == index else seq 2209 2210 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2211 # only used for teradata currently 2212 self._match(TokenType.COMMA) 2213 2214 kwargs = { 2215 "no": self._match_text_seq("NO"), 2216 "dual": self._match_text_seq("DUAL"), 2217 "before": self._match_text_seq("BEFORE"), 2218 "default": self._match_text_seq("DEFAULT"), 2219 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2220 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2221 "after": self._match_text_seq("AFTER"), 2222 "minimum": self._match_texts(("MIN", "MINIMUM")), 2223 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2224 } 2225 2226 if self._match_texts(self.PROPERTY_PARSERS): 2227 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2228 try: 2229 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2230 except TypeError: 2231 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2232 2233 return None 2234 2235 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2236 return self._parse_wrapped_csv(self._parse_property) 2237 2238 def _parse_property(self) -> t.Optional[exp.Expression]: 2239 if self._match_texts(self.PROPERTY_PARSERS): 2240 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2241 2242 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2243 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2244 2245 if self._match_text_seq("COMPOUND", "SORTKEY"): 2246 return self._parse_sortkey(compound=True) 2247 2248 if self._match_text_seq("SQL", "SECURITY"): 2249 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2250 2251 index = self._index 2252 2253 seq_props = self._parse_sequence_properties() 2254 if seq_props: 2255 return seq_props 2256 2257 self._retreat(index) 2258 key = self._parse_column() 2259 2260 if not self._match(TokenType.EQ): 2261 self._retreat(index) 2262 return None 2263 2264 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2265 if isinstance(key, exp.Column): 2266 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2267 2268 value = self._parse_bitwise() or self._parse_var(any_token=True) 2269 2270 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2271 if isinstance(value, exp.Column): 2272 value = exp.var(value.name) 2273 2274 return self.expression(exp.Property, this=key, value=value) 2275 2276 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2277 if self._match_text_seq("BY"): 2278 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2279 2280 self._match(TokenType.ALIAS) 2281 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2282 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2283 2284 return self.expression( 2285 exp.FileFormatProperty, 2286 this=( 2287 self.expression( 2288 exp.InputOutputFormat, 2289 input_format=input_format, 2290 output_format=output_format, 2291 ) 2292 if input_format or output_format 2293 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2294 ), 2295 hive_format=True, 2296 ) 2297 2298 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2299 field = self._parse_field() 2300 if isinstance(field, exp.Identifier) and not field.quoted: 2301 field = exp.var(field) 2302 2303 return field 2304 2305 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2306 self._match(TokenType.EQ) 2307 self._match(TokenType.ALIAS) 2308 2309 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2310 2311 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2312 properties = [] 2313 while True: 2314 if before: 2315 prop = self._parse_property_before() 2316 else: 2317 prop = self._parse_property() 2318 if not prop: 2319 break 2320 for p in ensure_list(prop): 2321 properties.append(p) 2322 2323 if properties: 2324 return self.expression(exp.Properties, expressions=properties) 2325 2326 return None 2327 2328 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2329 return self.expression( 2330 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2331 ) 2332 2333 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2334 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2335 security_specifier = self._prev.text.upper() 2336 return self.expression(exp.SecurityProperty, this=security_specifier) 2337 return None 2338 2339 def _parse_settings_property(self) -> exp.SettingsProperty: 2340 return self.expression( 2341 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2342 ) 2343 2344 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2345 if self._index >= 2: 2346 pre_volatile_token = self._tokens[self._index - 2] 2347 else: 2348 pre_volatile_token = None 2349 2350 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2351 return exp.VolatileProperty() 2352 2353 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2354 2355 def _parse_retention_period(self) -> exp.Var: 2356 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2357 number = self._parse_number() 2358 number_str = f"{number} " if number else "" 2359 unit = self._parse_var(any_token=True) 2360 return exp.var(f"{number_str}{unit}") 2361 2362 def _parse_system_versioning_property( 2363 self, with_: bool = False 2364 ) -> exp.WithSystemVersioningProperty: 2365 self._match(TokenType.EQ) 2366 prop = self.expression( 2367 exp.WithSystemVersioningProperty, 2368 **{ # type: ignore 2369 "on": True, 2370 "with": with_, 2371 }, 2372 ) 2373 2374 if self._match_text_seq("OFF"): 2375 prop.set("on", False) 2376 return prop 2377 2378 self._match(TokenType.ON) 2379 if self._match(TokenType.L_PAREN): 2380 while self._curr and not self._match(TokenType.R_PAREN): 2381 if self._match_text_seq("HISTORY_TABLE", "="): 2382 prop.set("this", self._parse_table_parts()) 2383 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2384 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2385 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2386 prop.set("retention_period", self._parse_retention_period()) 2387 2388 self._match(TokenType.COMMA) 2389 2390 return prop 2391 2392 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2393 self._match(TokenType.EQ) 2394 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2395 prop = self.expression(exp.DataDeletionProperty, on=on) 2396 2397 if self._match(TokenType.L_PAREN): 2398 while self._curr and not self._match(TokenType.R_PAREN): 2399 if self._match_text_seq("FILTER_COLUMN", "="): 2400 prop.set("filter_column", self._parse_column()) 2401 elif self._match_text_seq("RETENTION_PERIOD", "="): 2402 prop.set("retention_period", self._parse_retention_period()) 2403 2404 self._match(TokenType.COMMA) 2405 2406 return prop 2407 2408 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2409 kind = "HASH" 2410 expressions: t.Optional[t.List[exp.Expression]] = None 2411 if self._match_text_seq("BY", "HASH"): 2412 expressions = self._parse_wrapped_csv(self._parse_id_var) 2413 elif self._match_text_seq("BY", "RANDOM"): 2414 kind = "RANDOM" 2415 2416 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2417 buckets: t.Optional[exp.Expression] = None 2418 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2419 buckets = self._parse_number() 2420 2421 return self.expression( 2422 exp.DistributedByProperty, 2423 expressions=expressions, 2424 kind=kind, 2425 buckets=buckets, 2426 order=self._parse_order(), 2427 ) 2428 2429 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2430 self._match_text_seq("KEY") 2431 expressions = self._parse_wrapped_id_vars() 2432 return self.expression(expr_type, expressions=expressions) 2433 2434 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2435 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2436 prop = self._parse_system_versioning_property(with_=True) 2437 self._match_r_paren() 2438 return prop 2439 2440 if self._match(TokenType.L_PAREN, advance=False): 2441 return self._parse_wrapped_properties() 2442 2443 if self._match_text_seq("JOURNAL"): 2444 return self._parse_withjournaltable() 2445 2446 if self._match_texts(self.VIEW_ATTRIBUTES): 2447 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2448 2449 if self._match_text_seq("DATA"): 2450 return self._parse_withdata(no=False) 2451 elif self._match_text_seq("NO", "DATA"): 2452 return self._parse_withdata(no=True) 2453 2454 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2455 return self._parse_serde_properties(with_=True) 2456 2457 if self._match(TokenType.SCHEMA): 2458 return self.expression( 2459 exp.WithSchemaBindingProperty, 2460 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2461 ) 2462 2463 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2464 return self.expression( 2465 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2466 ) 2467 2468 if not self._next: 2469 return None 2470 2471 return self._parse_withisolatedloading() 2472 2473 def _parse_procedure_option(self) -> exp.Expression | None: 2474 if self._match_text_seq("EXECUTE", "AS"): 2475 return self.expression( 2476 exp.ExecuteAsProperty, 2477 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2478 or self._parse_string(), 2479 ) 2480 2481 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2482 2483 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2484 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2485 self._match(TokenType.EQ) 2486 2487 user = self._parse_id_var() 2488 self._match(TokenType.PARAMETER) 2489 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2490 2491 if not user or not host: 2492 return None 2493 2494 return exp.DefinerProperty(this=f"{user}@{host}") 2495 2496 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2497 self._match(TokenType.TABLE) 2498 self._match(TokenType.EQ) 2499 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2500 2501 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2502 return self.expression(exp.LogProperty, no=no) 2503 2504 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2505 return self.expression(exp.JournalProperty, **kwargs) 2506 2507 def _parse_checksum(self) -> exp.ChecksumProperty: 2508 self._match(TokenType.EQ) 2509 2510 on = None 2511 if self._match(TokenType.ON): 2512 on = True 2513 elif self._match_text_seq("OFF"): 2514 on = False 2515 2516 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2517 2518 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2519 return self.expression( 2520 exp.Cluster, 2521 expressions=( 2522 self._parse_wrapped_csv(self._parse_ordered) 2523 if wrapped 2524 else self._parse_csv(self._parse_ordered) 2525 ), 2526 ) 2527 2528 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2529 self._match_text_seq("BY") 2530 2531 self._match_l_paren() 2532 expressions = self._parse_csv(self._parse_column) 2533 self._match_r_paren() 2534 2535 if self._match_text_seq("SORTED", "BY"): 2536 self._match_l_paren() 2537 sorted_by = self._parse_csv(self._parse_ordered) 2538 self._match_r_paren() 2539 else: 2540 sorted_by = None 2541 2542 self._match(TokenType.INTO) 2543 buckets = self._parse_number() 2544 self._match_text_seq("BUCKETS") 2545 2546 return self.expression( 2547 exp.ClusteredByProperty, 2548 expressions=expressions, 2549 sorted_by=sorted_by, 2550 buckets=buckets, 2551 ) 2552 2553 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2554 if not self._match_text_seq("GRANTS"): 2555 self._retreat(self._index - 1) 2556 return None 2557 2558 return self.expression(exp.CopyGrantsProperty) 2559 2560 def _parse_freespace(self) -> exp.FreespaceProperty: 2561 self._match(TokenType.EQ) 2562 return self.expression( 2563 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2564 ) 2565 2566 def _parse_mergeblockratio( 2567 self, no: bool = False, default: bool = False 2568 ) -> exp.MergeBlockRatioProperty: 2569 if self._match(TokenType.EQ): 2570 return self.expression( 2571 exp.MergeBlockRatioProperty, 2572 this=self._parse_number(), 2573 percent=self._match(TokenType.PERCENT), 2574 ) 2575 2576 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2577 2578 def _parse_datablocksize( 2579 self, 2580 default: t.Optional[bool] = None, 2581 minimum: t.Optional[bool] = None, 2582 maximum: t.Optional[bool] = None, 2583 ) -> exp.DataBlocksizeProperty: 2584 self._match(TokenType.EQ) 2585 size = self._parse_number() 2586 2587 units = None 2588 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2589 units = self._prev.text 2590 2591 return self.expression( 2592 exp.DataBlocksizeProperty, 2593 size=size, 2594 units=units, 2595 default=default, 2596 minimum=minimum, 2597 maximum=maximum, 2598 ) 2599 2600 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2601 self._match(TokenType.EQ) 2602 always = self._match_text_seq("ALWAYS") 2603 manual = self._match_text_seq("MANUAL") 2604 never = self._match_text_seq("NEVER") 2605 default = self._match_text_seq("DEFAULT") 2606 2607 autotemp = None 2608 if self._match_text_seq("AUTOTEMP"): 2609 autotemp = self._parse_schema() 2610 2611 return self.expression( 2612 exp.BlockCompressionProperty, 2613 always=always, 2614 manual=manual, 2615 never=never, 2616 default=default, 2617 autotemp=autotemp, 2618 ) 2619 2620 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2621 index = self._index 2622 no = self._match_text_seq("NO") 2623 concurrent = self._match_text_seq("CONCURRENT") 2624 2625 if not self._match_text_seq("ISOLATED", "LOADING"): 2626 self._retreat(index) 2627 return None 2628 2629 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2630 return self.expression( 2631 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2632 ) 2633 2634 def _parse_locking(self) -> exp.LockingProperty: 2635 if self._match(TokenType.TABLE): 2636 kind = "TABLE" 2637 elif self._match(TokenType.VIEW): 2638 kind = "VIEW" 2639 elif self._match(TokenType.ROW): 2640 kind = "ROW" 2641 elif self._match_text_seq("DATABASE"): 2642 kind = "DATABASE" 2643 else: 2644 kind = None 2645 2646 if kind in ("DATABASE", "TABLE", "VIEW"): 2647 this = self._parse_table_parts() 2648 else: 2649 this = None 2650 2651 if self._match(TokenType.FOR): 2652 for_or_in = "FOR" 2653 elif self._match(TokenType.IN): 2654 for_or_in = "IN" 2655 else: 2656 for_or_in = None 2657 2658 if self._match_text_seq("ACCESS"): 2659 lock_type = "ACCESS" 2660 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2661 lock_type = "EXCLUSIVE" 2662 elif self._match_text_seq("SHARE"): 2663 lock_type = "SHARE" 2664 elif self._match_text_seq("READ"): 2665 lock_type = "READ" 2666 elif self._match_text_seq("WRITE"): 2667 lock_type = "WRITE" 2668 elif self._match_text_seq("CHECKSUM"): 2669 lock_type = "CHECKSUM" 2670 else: 2671 lock_type = None 2672 2673 override = self._match_text_seq("OVERRIDE") 2674 2675 return self.expression( 2676 exp.LockingProperty, 2677 this=this, 2678 kind=kind, 2679 for_or_in=for_or_in, 2680 lock_type=lock_type, 2681 override=override, 2682 ) 2683 2684 def _parse_partition_by(self) -> t.List[exp.Expression]: 2685 if self._match(TokenType.PARTITION_BY): 2686 return self._parse_csv(self._parse_assignment) 2687 return [] 2688 2689 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2690 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2691 if self._match_text_seq("MINVALUE"): 2692 return exp.var("MINVALUE") 2693 if self._match_text_seq("MAXVALUE"): 2694 return exp.var("MAXVALUE") 2695 return self._parse_bitwise() 2696 2697 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2698 expression = None 2699 from_expressions = None 2700 to_expressions = None 2701 2702 if self._match(TokenType.IN): 2703 this = self._parse_wrapped_csv(self._parse_bitwise) 2704 elif self._match(TokenType.FROM): 2705 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2706 self._match_text_seq("TO") 2707 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2708 elif self._match_text_seq("WITH", "(", "MODULUS"): 2709 this = self._parse_number() 2710 self._match_text_seq(",", "REMAINDER") 2711 expression = self._parse_number() 2712 self._match_r_paren() 2713 else: 2714 self.raise_error("Failed to parse partition bound spec.") 2715 2716 return self.expression( 2717 exp.PartitionBoundSpec, 2718 this=this, 2719 expression=expression, 2720 from_expressions=from_expressions, 2721 to_expressions=to_expressions, 2722 ) 2723 2724 # https://www.postgresql.org/docs/current/sql-createtable.html 2725 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2726 if not self._match_text_seq("OF"): 2727 self._retreat(self._index - 1) 2728 return None 2729 2730 this = self._parse_table(schema=True) 2731 2732 if self._match(TokenType.DEFAULT): 2733 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2734 elif self._match_text_seq("FOR", "VALUES"): 2735 expression = self._parse_partition_bound_spec() 2736 else: 2737 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2738 2739 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2740 2741 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2742 self._match(TokenType.EQ) 2743 return self.expression( 2744 exp.PartitionedByProperty, 2745 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2746 ) 2747 2748 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2749 if self._match_text_seq("AND", "STATISTICS"): 2750 statistics = True 2751 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2752 statistics = False 2753 else: 2754 statistics = None 2755 2756 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2757 2758 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2759 if self._match_text_seq("SQL"): 2760 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2761 return None 2762 2763 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL", "DATA"): 2765 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2766 return None 2767 2768 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("PRIMARY", "INDEX"): 2770 return exp.NoPrimaryIndexProperty() 2771 if self._match_text_seq("SQL"): 2772 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2773 return None 2774 2775 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2776 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2777 return exp.OnCommitProperty() 2778 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2779 return exp.OnCommitProperty(delete=True) 2780 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2781 2782 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2783 if self._match_text_seq("SQL", "DATA"): 2784 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2785 return None 2786 2787 def _parse_distkey(self) -> exp.DistKeyProperty: 2788 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2789 2790 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2791 table = self._parse_table(schema=True) 2792 2793 options = [] 2794 while self._match_texts(("INCLUDING", "EXCLUDING")): 2795 this = self._prev.text.upper() 2796 2797 id_var = self._parse_id_var() 2798 if not id_var: 2799 return None 2800 2801 options.append( 2802 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2803 ) 2804 2805 return self.expression(exp.LikeProperty, this=table, expressions=options) 2806 2807 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2808 return self.expression( 2809 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2810 ) 2811 2812 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2813 self._match(TokenType.EQ) 2814 return self.expression( 2815 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2816 ) 2817 2818 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2819 self._match_text_seq("WITH", "CONNECTION") 2820 return self.expression( 2821 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2822 ) 2823 2824 def _parse_returns(self) -> exp.ReturnsProperty: 2825 value: t.Optional[exp.Expression] 2826 null = None 2827 is_table = self._match(TokenType.TABLE) 2828 2829 if is_table: 2830 if self._match(TokenType.LT): 2831 value = self.expression( 2832 exp.Schema, 2833 this="TABLE", 2834 expressions=self._parse_csv(self._parse_struct_types), 2835 ) 2836 if not self._match(TokenType.GT): 2837 self.raise_error("Expecting >") 2838 else: 2839 value = self._parse_schema(exp.var("TABLE")) 2840 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2841 null = True 2842 value = None 2843 else: 2844 value = self._parse_types() 2845 2846 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2847 2848 def _parse_describe(self) -> exp.Describe: 2849 kind = self._match_set(self.CREATABLES) and self._prev.text 2850 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2851 if self._match(TokenType.DOT): 2852 style = None 2853 self._retreat(self._index - 2) 2854 2855 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2856 2857 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2858 this = self._parse_statement() 2859 else: 2860 this = self._parse_table(schema=True) 2861 2862 properties = self._parse_properties() 2863 expressions = properties.expressions if properties else None 2864 partition = self._parse_partition() 2865 return self.expression( 2866 exp.Describe, 2867 this=this, 2868 style=style, 2869 kind=kind, 2870 expressions=expressions, 2871 partition=partition, 2872 format=format, 2873 ) 2874 2875 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2876 kind = self._prev.text.upper() 2877 expressions = [] 2878 2879 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2880 if self._match(TokenType.WHEN): 2881 expression = self._parse_disjunction() 2882 self._match(TokenType.THEN) 2883 else: 2884 expression = None 2885 2886 else_ = self._match(TokenType.ELSE) 2887 2888 if not self._match(TokenType.INTO): 2889 return None 2890 2891 return self.expression( 2892 exp.ConditionalInsert, 2893 this=self.expression( 2894 exp.Insert, 2895 this=self._parse_table(schema=True), 2896 expression=self._parse_derived_table_values(), 2897 ), 2898 expression=expression, 2899 else_=else_, 2900 ) 2901 2902 expression = parse_conditional_insert() 2903 while expression is not None: 2904 expressions.append(expression) 2905 expression = parse_conditional_insert() 2906 2907 return self.expression( 2908 exp.MultitableInserts, 2909 kind=kind, 2910 comments=comments, 2911 expressions=expressions, 2912 source=self._parse_table(), 2913 ) 2914 2915 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2916 comments = [] 2917 hint = self._parse_hint() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 ignore = self._match(TokenType.IGNORE) 2920 local = self._match_text_seq("LOCAL") 2921 alternative = None 2922 is_function = None 2923 2924 if self._match_text_seq("DIRECTORY"): 2925 this: t.Optional[exp.Expression] = self.expression( 2926 exp.Directory, 2927 this=self._parse_var_or_string(), 2928 local=local, 2929 row_format=self._parse_row_format(match_row=True), 2930 ) 2931 else: 2932 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2933 comments += ensure_list(self._prev_comments) 2934 return self._parse_multitable_inserts(comments) 2935 2936 if self._match(TokenType.OR): 2937 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2938 2939 self._match(TokenType.INTO) 2940 comments += ensure_list(self._prev_comments) 2941 self._match(TokenType.TABLE) 2942 is_function = self._match(TokenType.FUNCTION) 2943 2944 this = ( 2945 self._parse_table(schema=True, parse_partition=True) 2946 if not is_function 2947 else self._parse_function() 2948 ) 2949 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2950 this.set("alias", self._parse_table_alias()) 2951 2952 returning = self._parse_returning() 2953 2954 return self.expression( 2955 exp.Insert, 2956 comments=comments, 2957 hint=hint, 2958 is_function=is_function, 2959 this=this, 2960 stored=self._match_text_seq("STORED") and self._parse_stored(), 2961 by_name=self._match_text_seq("BY", "NAME"), 2962 exists=self._parse_exists(), 2963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2964 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2965 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2966 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2967 conflict=self._parse_on_conflict(), 2968 returning=returning or self._parse_returning(), 2969 overwrite=overwrite, 2970 alternative=alternative, 2971 ignore=ignore, 2972 source=self._match(TokenType.TABLE) and self._parse_table(), 2973 ) 2974 2975 def _parse_kill(self) -> exp.Kill: 2976 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2977 2978 return self.expression( 2979 exp.Kill, 2980 this=self._parse_primary(), 2981 kind=kind, 2982 ) 2983 2984 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2985 conflict = self._match_text_seq("ON", "CONFLICT") 2986 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2987 2988 if not conflict and not duplicate: 2989 return None 2990 2991 conflict_keys = None 2992 constraint = None 2993 2994 if conflict: 2995 if self._match_text_seq("ON", "CONSTRAINT"): 2996 constraint = self._parse_id_var() 2997 elif self._match(TokenType.L_PAREN): 2998 conflict_keys = self._parse_csv(self._parse_id_var) 2999 self._match_r_paren() 3000 3001 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3002 if self._prev.token_type == TokenType.UPDATE: 3003 self._match(TokenType.SET) 3004 expressions = self._parse_csv(self._parse_equality) 3005 else: 3006 expressions = None 3007 3008 return self.expression( 3009 exp.OnConflict, 3010 duplicate=duplicate, 3011 expressions=expressions, 3012 action=action, 3013 conflict_keys=conflict_keys, 3014 constraint=constraint, 3015 where=self._parse_where(), 3016 ) 3017 3018 def _parse_returning(self) -> t.Optional[exp.Returning]: 3019 if not self._match(TokenType.RETURNING): 3020 return None 3021 return self.expression( 3022 exp.Returning, 3023 expressions=self._parse_csv(self._parse_expression), 3024 into=self._match(TokenType.INTO) and self._parse_table_part(), 3025 ) 3026 3027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3028 if not self._match(TokenType.FORMAT): 3029 return None 3030 return self._parse_row_format() 3031 3032 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3033 index = self._index 3034 with_ = with_ or self._match_text_seq("WITH") 3035 3036 if not self._match(TokenType.SERDE_PROPERTIES): 3037 self._retreat(index) 3038 return None 3039 return self.expression( 3040 exp.SerdeProperties, 3041 **{ # type: ignore 3042 "expressions": self._parse_wrapped_properties(), 3043 "with": with_, 3044 }, 3045 ) 3046 3047 def _parse_row_format( 3048 self, match_row: bool = False 3049 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3050 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3051 return None 3052 3053 if self._match_text_seq("SERDE"): 3054 this = self._parse_string() 3055 3056 serde_properties = self._parse_serde_properties() 3057 3058 return self.expression( 3059 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3060 ) 3061 3062 self._match_text_seq("DELIMITED") 3063 3064 kwargs = {} 3065 3066 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3067 kwargs["fields"] = self._parse_string() 3068 if self._match_text_seq("ESCAPED", "BY"): 3069 kwargs["escaped"] = self._parse_string() 3070 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3071 kwargs["collection_items"] = self._parse_string() 3072 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3073 kwargs["map_keys"] = self._parse_string() 3074 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3075 kwargs["lines"] = self._parse_string() 3076 if self._match_text_seq("NULL", "DEFINED", "AS"): 3077 kwargs["null"] = self._parse_string() 3078 3079 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3080 3081 def _parse_load(self) -> exp.LoadData | exp.Command: 3082 if self._match_text_seq("DATA"): 3083 local = self._match_text_seq("LOCAL") 3084 self._match_text_seq("INPATH") 3085 inpath = self._parse_string() 3086 overwrite = self._match(TokenType.OVERWRITE) 3087 self._match_pair(TokenType.INTO, TokenType.TABLE) 3088 3089 return self.expression( 3090 exp.LoadData, 3091 this=self._parse_table(schema=True), 3092 local=local, 3093 overwrite=overwrite, 3094 inpath=inpath, 3095 partition=self._parse_partition(), 3096 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3097 serde=self._match_text_seq("SERDE") and self._parse_string(), 3098 ) 3099 return self._parse_as_command(self._prev) 3100 3101 def _parse_delete(self) -> exp.Delete: 3102 # This handles MySQL's "Multiple-Table Syntax" 3103 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3104 tables = None 3105 if not self._match(TokenType.FROM, advance=False): 3106 tables = self._parse_csv(self._parse_table) or None 3107 3108 returning = self._parse_returning() 3109 3110 return self.expression( 3111 exp.Delete, 3112 tables=tables, 3113 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3114 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3115 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3116 where=self._parse_where(), 3117 returning=returning or self._parse_returning(), 3118 limit=self._parse_limit(), 3119 ) 3120 3121 def _parse_update(self) -> exp.Update: 3122 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3123 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3124 returning = self._parse_returning() 3125 return self.expression( 3126 exp.Update, 3127 **{ # type: ignore 3128 "this": this, 3129 "expressions": expressions, 3130 "from": self._parse_from(joins=True), 3131 "where": self._parse_where(), 3132 "returning": returning or self._parse_returning(), 3133 "order": self._parse_order(), 3134 "limit": self._parse_limit(), 3135 }, 3136 ) 3137 3138 def _parse_use(self) -> exp.Use: 3139 return self.expression( 3140 exp.Use, 3141 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3142 this=self._parse_table(schema=False), 3143 ) 3144 3145 def _parse_uncache(self) -> exp.Uncache: 3146 if not self._match(TokenType.TABLE): 3147 self.raise_error("Expecting TABLE after UNCACHE") 3148 3149 return self.expression( 3150 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3151 ) 3152 3153 def _parse_cache(self) -> exp.Cache: 3154 lazy = self._match_text_seq("LAZY") 3155 self._match(TokenType.TABLE) 3156 table = self._parse_table(schema=True) 3157 3158 options = [] 3159 if self._match_text_seq("OPTIONS"): 3160 self._match_l_paren() 3161 k = self._parse_string() 3162 self._match(TokenType.EQ) 3163 v = self._parse_string() 3164 options = [k, v] 3165 self._match_r_paren() 3166 3167 self._match(TokenType.ALIAS) 3168 return self.expression( 3169 exp.Cache, 3170 this=table, 3171 lazy=lazy, 3172 options=options, 3173 expression=self._parse_select(nested=True), 3174 ) 3175 3176 def _parse_partition(self) -> t.Optional[exp.Partition]: 3177 if not self._match_texts(self.PARTITION_KEYWORDS): 3178 return None 3179 3180 return self.expression( 3181 exp.Partition, 3182 subpartition=self._prev.text.upper() == "SUBPARTITION", 3183 expressions=self._parse_wrapped_csv(self._parse_assignment), 3184 ) 3185 3186 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3187 def _parse_value_expression() -> t.Optional[exp.Expression]: 3188 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3189 return exp.var(self._prev.text.upper()) 3190 return self._parse_expression() 3191 3192 if self._match(TokenType.L_PAREN): 3193 expressions = self._parse_csv(_parse_value_expression) 3194 self._match_r_paren() 3195 return self.expression(exp.Tuple, expressions=expressions) 3196 3197 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3198 expression = self._parse_expression() 3199 if expression: 3200 return self.expression(exp.Tuple, expressions=[expression]) 3201 return None 3202 3203 def _parse_projections(self) -> t.List[exp.Expression]: 3204 return self._parse_expressions() 3205 3206 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3207 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3208 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3209 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3210 ) 3211 elif self._match(TokenType.FROM): 3212 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3213 # Support parentheses for duckdb FROM-first syntax 3214 select = self._parse_select(from_=from_) 3215 if select: 3216 if not select.args.get("from"): 3217 select.set("from", from_) 3218 this = select 3219 else: 3220 this = exp.select("*").from_(t.cast(exp.From, from_)) 3221 else: 3222 this = ( 3223 self._parse_table(consume_pipe=True) 3224 if table 3225 else self._parse_select(nested=True, parse_set_operation=False) 3226 ) 3227 3228 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3229 # in case a modifier (e.g. join) is following 3230 if table and isinstance(this, exp.Values) and this.alias: 3231 alias = this.args["alias"].pop() 3232 this = exp.Table(this=this, alias=alias) 3233 3234 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3235 3236 return this 3237 3238 def _parse_select( 3239 self, 3240 nested: bool = False, 3241 table: bool = False, 3242 parse_subquery_alias: bool = True, 3243 parse_set_operation: bool = True, 3244 consume_pipe: bool = True, 3245 from_: t.Optional[exp.From] = None, 3246 ) -> t.Optional[exp.Expression]: 3247 query = self._parse_select_query( 3248 nested=nested, 3249 table=table, 3250 parse_subquery_alias=parse_subquery_alias, 3251 parse_set_operation=parse_set_operation, 3252 ) 3253 3254 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3255 if not query and from_: 3256 query = exp.select("*").from_(from_) 3257 if isinstance(query, exp.Query): 3258 query = self._parse_pipe_syntax_query(query) 3259 query = query.subquery(copy=False) if query and table else query 3260 3261 return query 3262 3263 def _parse_select_query( 3264 self, 3265 nested: bool = False, 3266 table: bool = False, 3267 parse_subquery_alias: bool = True, 3268 parse_set_operation: bool = True, 3269 ) -> t.Optional[exp.Expression]: 3270 cte = self._parse_with() 3271 3272 if cte: 3273 this = self._parse_statement() 3274 3275 if not this: 3276 self.raise_error("Failed to parse any statement following CTE") 3277 return cte 3278 3279 if "with" in this.arg_types: 3280 this.set("with", cte) 3281 else: 3282 self.raise_error(f"{this.key} does not support CTE") 3283 this = cte 3284 3285 return this 3286 3287 # duckdb supports leading with FROM x 3288 from_ = ( 3289 self._parse_from(consume_pipe=True) 3290 if self._match(TokenType.FROM, advance=False) 3291 else None 3292 ) 3293 3294 if self._match(TokenType.SELECT): 3295 comments = self._prev_comments 3296 3297 hint = self._parse_hint() 3298 3299 if self._next and not self._next.token_type == TokenType.DOT: 3300 all_ = self._match(TokenType.ALL) 3301 distinct = self._match_set(self.DISTINCT_TOKENS) 3302 else: 3303 all_, distinct = None, None 3304 3305 kind = ( 3306 self._match(TokenType.ALIAS) 3307 and self._match_texts(("STRUCT", "VALUE")) 3308 and self._prev.text.upper() 3309 ) 3310 3311 if distinct: 3312 distinct = self.expression( 3313 exp.Distinct, 3314 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3315 ) 3316 3317 if all_ and distinct: 3318 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3319 3320 operation_modifiers = [] 3321 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3322 operation_modifiers.append(exp.var(self._prev.text.upper())) 3323 3324 limit = self._parse_limit(top=True) 3325 projections = self._parse_projections() 3326 3327 this = self.expression( 3328 exp.Select, 3329 kind=kind, 3330 hint=hint, 3331 distinct=distinct, 3332 expressions=projections, 3333 limit=limit, 3334 operation_modifiers=operation_modifiers or None, 3335 ) 3336 this.comments = comments 3337 3338 into = self._parse_into() 3339 if into: 3340 this.set("into", into) 3341 3342 if not from_: 3343 from_ = self._parse_from() 3344 3345 if from_: 3346 this.set("from", from_) 3347 3348 this = self._parse_query_modifiers(this) 3349 elif (table or nested) and self._match(TokenType.L_PAREN): 3350 this = self._parse_wrapped_select(table=table) 3351 3352 # We return early here so that the UNION isn't attached to the subquery by the 3353 # following call to _parse_set_operations, but instead becomes the parent node 3354 self._match_r_paren() 3355 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3356 elif self._match(TokenType.VALUES, advance=False): 3357 this = self._parse_derived_table_values() 3358 elif from_: 3359 this = exp.select("*").from_(from_.this, copy=False) 3360 elif self._match(TokenType.SUMMARIZE): 3361 table = self._match(TokenType.TABLE) 3362 this = self._parse_select() or self._parse_string() or self._parse_table() 3363 return self.expression(exp.Summarize, this=this, table=table) 3364 elif self._match(TokenType.DESCRIBE): 3365 this = self._parse_describe() 3366 elif self._match_text_seq("STREAM"): 3367 this = self._parse_function() 3368 if this: 3369 this = self.expression(exp.Stream, this=this) 3370 else: 3371 self._retreat(self._index - 1) 3372 else: 3373 this = None 3374 3375 return self._parse_set_operations(this) if parse_set_operation else this 3376 3377 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3378 self._match_text_seq("SEARCH") 3379 3380 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3381 3382 if not kind: 3383 return None 3384 3385 self._match_text_seq("FIRST", "BY") 3386 3387 return self.expression( 3388 exp.RecursiveWithSearch, 3389 kind=kind, 3390 this=self._parse_id_var(), 3391 expression=self._match_text_seq("SET") and self._parse_id_var(), 3392 using=self._match_text_seq("USING") and self._parse_id_var(), 3393 ) 3394 3395 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3396 if not skip_with_token and not self._match(TokenType.WITH): 3397 return None 3398 3399 comments = self._prev_comments 3400 recursive = self._match(TokenType.RECURSIVE) 3401 3402 last_comments = None 3403 expressions = [] 3404 while True: 3405 cte = self._parse_cte() 3406 if isinstance(cte, exp.CTE): 3407 expressions.append(cte) 3408 if last_comments: 3409 cte.add_comments(last_comments) 3410 3411 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3412 break 3413 else: 3414 self._match(TokenType.WITH) 3415 3416 last_comments = self._prev_comments 3417 3418 return self.expression( 3419 exp.With, 3420 comments=comments, 3421 expressions=expressions, 3422 recursive=recursive, 3423 search=self._parse_recursive_with_search(), 3424 ) 3425 3426 def _parse_cte(self) -> t.Optional[exp.CTE]: 3427 index = self._index 3428 3429 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3430 if not alias or not alias.this: 3431 self.raise_error("Expected CTE to have alias") 3432 3433 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3434 self._retreat(index) 3435 return None 3436 3437 comments = self._prev_comments 3438 3439 if self._match_text_seq("NOT", "MATERIALIZED"): 3440 materialized = False 3441 elif self._match_text_seq("MATERIALIZED"): 3442 materialized = True 3443 else: 3444 materialized = None 3445 3446 cte = self.expression( 3447 exp.CTE, 3448 this=self._parse_wrapped(self._parse_statement), 3449 alias=alias, 3450 materialized=materialized, 3451 comments=comments, 3452 ) 3453 3454 values = cte.this 3455 if isinstance(values, exp.Values): 3456 if values.alias: 3457 cte.set("this", exp.select("*").from_(values)) 3458 else: 3459 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3460 3461 return cte 3462 3463 def _parse_table_alias( 3464 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3465 ) -> t.Optional[exp.TableAlias]: 3466 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3467 # so this section tries to parse the clause version and if it fails, it treats the token 3468 # as an identifier (alias) 3469 if self._can_parse_limit_or_offset(): 3470 return None 3471 3472 any_token = self._match(TokenType.ALIAS) 3473 alias = ( 3474 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3475 or self._parse_string_as_identifier() 3476 ) 3477 3478 index = self._index 3479 if self._match(TokenType.L_PAREN): 3480 columns = self._parse_csv(self._parse_function_parameter) 3481 self._match_r_paren() if columns else self._retreat(index) 3482 else: 3483 columns = None 3484 3485 if not alias and not columns: 3486 return None 3487 3488 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3489 3490 # We bubble up comments from the Identifier to the TableAlias 3491 if isinstance(alias, exp.Identifier): 3492 table_alias.add_comments(alias.pop_comments()) 3493 3494 return table_alias 3495 3496 def _parse_subquery( 3497 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3498 ) -> t.Optional[exp.Subquery]: 3499 if not this: 3500 return None 3501 3502 return self.expression( 3503 exp.Subquery, 3504 this=this, 3505 pivots=self._parse_pivots(), 3506 alias=self._parse_table_alias() if parse_alias else None, 3507 sample=self._parse_table_sample(), 3508 ) 3509 3510 def _implicit_unnests_to_explicit(self, this: E) -> E: 3511 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3512 3513 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3514 for i, join in enumerate(this.args.get("joins") or []): 3515 table = join.this 3516 normalized_table = table.copy() 3517 normalized_table.meta["maybe_column"] = True 3518 normalized_table = _norm(normalized_table, dialect=self.dialect) 3519 3520 if isinstance(table, exp.Table) and not join.args.get("on"): 3521 if normalized_table.parts[0].name in refs: 3522 table_as_column = table.to_column() 3523 unnest = exp.Unnest(expressions=[table_as_column]) 3524 3525 # Table.to_column creates a parent Alias node that we want to convert to 3526 # a TableAlias and attach to the Unnest, so it matches the parser's output 3527 if isinstance(table.args.get("alias"), exp.TableAlias): 3528 table_as_column.replace(table_as_column.this) 3529 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3530 3531 table.replace(unnest) 3532 3533 refs.add(normalized_table.alias_or_name) 3534 3535 return this 3536 3537 def _parse_query_modifiers( 3538 self, this: t.Optional[exp.Expression] 3539 ) -> t.Optional[exp.Expression]: 3540 if isinstance(this, self.MODIFIABLES): 3541 for join in self._parse_joins(): 3542 this.append("joins", join) 3543 for lateral in iter(self._parse_lateral, None): 3544 this.append("laterals", lateral) 3545 3546 while True: 3547 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3548 modifier_token = self._curr 3549 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3550 key, expression = parser(self) 3551 3552 if expression: 3553 if this.args.get(key): 3554 self.raise_error( 3555 f"Found multiple '{modifier_token.text.upper()}' clauses", 3556 token=modifier_token, 3557 ) 3558 3559 this.set(key, expression) 3560 if key == "limit": 3561 offset = expression.args.pop("offset", None) 3562 3563 if offset: 3564 offset = exp.Offset(expression=offset) 3565 this.set("offset", offset) 3566 3567 limit_by_expressions = expression.expressions 3568 expression.set("expressions", None) 3569 offset.set("expressions", limit_by_expressions) 3570 continue 3571 break 3572 3573 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3574 this = self._implicit_unnests_to_explicit(this) 3575 3576 return this 3577 3578 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3579 start = self._curr 3580 while self._curr: 3581 self._advance() 3582 3583 end = self._tokens[self._index - 1] 3584 return exp.Hint(expressions=[self._find_sql(start, end)]) 3585 3586 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3587 return self._parse_function_call() 3588 3589 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3590 start_index = self._index 3591 should_fallback_to_string = False 3592 3593 hints = [] 3594 try: 3595 for hint in iter( 3596 lambda: self._parse_csv( 3597 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3598 ), 3599 [], 3600 ): 3601 hints.extend(hint) 3602 except ParseError: 3603 should_fallback_to_string = True 3604 3605 if should_fallback_to_string or self._curr: 3606 self._retreat(start_index) 3607 return self._parse_hint_fallback_to_string() 3608 3609 return self.expression(exp.Hint, expressions=hints) 3610 3611 def _parse_hint(self) -> t.Optional[exp.Hint]: 3612 if self._match(TokenType.HINT) and self._prev_comments: 3613 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3614 3615 return None 3616 3617 def _parse_into(self) -> t.Optional[exp.Into]: 3618 if not self._match(TokenType.INTO): 3619 return None 3620 3621 temp = self._match(TokenType.TEMPORARY) 3622 unlogged = self._match_text_seq("UNLOGGED") 3623 self._match(TokenType.TABLE) 3624 3625 return self.expression( 3626 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3627 ) 3628 3629 def _parse_from( 3630 self, 3631 joins: bool = False, 3632 skip_from_token: bool = False, 3633 consume_pipe: bool = False, 3634 ) -> t.Optional[exp.From]: 3635 if not skip_from_token and not self._match(TokenType.FROM): 3636 return None 3637 3638 return self.expression( 3639 exp.From, 3640 comments=self._prev_comments, 3641 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3642 ) 3643 3644 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3645 return self.expression( 3646 exp.MatchRecognizeMeasure, 3647 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3648 this=self._parse_expression(), 3649 ) 3650 3651 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3652 if not self._match(TokenType.MATCH_RECOGNIZE): 3653 return None 3654 3655 self._match_l_paren() 3656 3657 partition = self._parse_partition_by() 3658 order = self._parse_order() 3659 3660 measures = ( 3661 self._parse_csv(self._parse_match_recognize_measure) 3662 if self._match_text_seq("MEASURES") 3663 else None 3664 ) 3665 3666 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3667 rows = exp.var("ONE ROW PER MATCH") 3668 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3669 text = "ALL ROWS PER MATCH" 3670 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3671 text += " SHOW EMPTY MATCHES" 3672 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3673 text += " OMIT EMPTY MATCHES" 3674 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3675 text += " WITH UNMATCHED ROWS" 3676 rows = exp.var(text) 3677 else: 3678 rows = None 3679 3680 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3681 text = "AFTER MATCH SKIP" 3682 if self._match_text_seq("PAST", "LAST", "ROW"): 3683 text += " PAST LAST ROW" 3684 elif self._match_text_seq("TO", "NEXT", "ROW"): 3685 text += " TO NEXT ROW" 3686 elif self._match_text_seq("TO", "FIRST"): 3687 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3688 elif self._match_text_seq("TO", "LAST"): 3689 text += f" TO LAST {self._advance_any().text}" # type: ignore 3690 after = exp.var(text) 3691 else: 3692 after = None 3693 3694 if self._match_text_seq("PATTERN"): 3695 self._match_l_paren() 3696 3697 if not self._curr: 3698 self.raise_error("Expecting )", self._curr) 3699 3700 paren = 1 3701 start = self._curr 3702 3703 while self._curr and paren > 0: 3704 if self._curr.token_type == TokenType.L_PAREN: 3705 paren += 1 3706 if self._curr.token_type == TokenType.R_PAREN: 3707 paren -= 1 3708 3709 end = self._prev 3710 self._advance() 3711 3712 if paren > 0: 3713 self.raise_error("Expecting )", self._curr) 3714 3715 pattern = exp.var(self._find_sql(start, end)) 3716 else: 3717 pattern = None 3718 3719 define = ( 3720 self._parse_csv(self._parse_name_as_expression) 3721 if self._match_text_seq("DEFINE") 3722 else None 3723 ) 3724 3725 self._match_r_paren() 3726 3727 return self.expression( 3728 exp.MatchRecognize, 3729 partition_by=partition, 3730 order=order, 3731 measures=measures, 3732 rows=rows, 3733 after=after, 3734 pattern=pattern, 3735 define=define, 3736 alias=self._parse_table_alias(), 3737 ) 3738 3739 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3740 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3741 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3742 cross_apply = False 3743 3744 if cross_apply is not None: 3745 this = self._parse_select(table=True) 3746 view = None 3747 outer = None 3748 elif self._match(TokenType.LATERAL): 3749 this = self._parse_select(table=True) 3750 view = self._match(TokenType.VIEW) 3751 outer = self._match(TokenType.OUTER) 3752 else: 3753 return None 3754 3755 if not this: 3756 this = ( 3757 self._parse_unnest() 3758 or self._parse_function() 3759 or self._parse_id_var(any_token=False) 3760 ) 3761 3762 while self._match(TokenType.DOT): 3763 this = exp.Dot( 3764 this=this, 3765 expression=self._parse_function() or self._parse_id_var(any_token=False), 3766 ) 3767 3768 ordinality: t.Optional[bool] = None 3769 3770 if view: 3771 table = self._parse_id_var(any_token=False) 3772 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3773 table_alias: t.Optional[exp.TableAlias] = self.expression( 3774 exp.TableAlias, this=table, columns=columns 3775 ) 3776 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3777 # We move the alias from the lateral's child node to the lateral itself 3778 table_alias = this.args["alias"].pop() 3779 else: 3780 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3781 table_alias = self._parse_table_alias() 3782 3783 return self.expression( 3784 exp.Lateral, 3785 this=this, 3786 view=view, 3787 outer=outer, 3788 alias=table_alias, 3789 cross_apply=cross_apply, 3790 ordinality=ordinality, 3791 ) 3792 3793 def _parse_join_parts( 3794 self, 3795 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3796 return ( 3797 self._match_set(self.JOIN_METHODS) and self._prev, 3798 self._match_set(self.JOIN_SIDES) and self._prev, 3799 self._match_set(self.JOIN_KINDS) and self._prev, 3800 ) 3801 3802 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3803 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3804 this = self._parse_column() 3805 if isinstance(this, exp.Column): 3806 return this.this 3807 return this 3808 3809 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3810 3811 def _parse_join( 3812 self, skip_join_token: bool = False, parse_bracket: bool = False 3813 ) -> t.Optional[exp.Join]: 3814 if self._match(TokenType.COMMA): 3815 table = self._try_parse(self._parse_table) 3816 cross_join = self.expression(exp.Join, this=table) if table else None 3817 3818 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3819 cross_join.set("kind", "CROSS") 3820 3821 return cross_join 3822 3823 index = self._index 3824 method, side, kind = self._parse_join_parts() 3825 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3826 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3827 join_comments = self._prev_comments 3828 3829 if not skip_join_token and not join: 3830 self._retreat(index) 3831 kind = None 3832 method = None 3833 side = None 3834 3835 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3836 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3837 3838 if not skip_join_token and not join and not outer_apply and not cross_apply: 3839 return None 3840 3841 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3842 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3843 kwargs["expressions"] = self._parse_csv( 3844 lambda: self._parse_table(parse_bracket=parse_bracket) 3845 ) 3846 3847 if method: 3848 kwargs["method"] = method.text 3849 if side: 3850 kwargs["side"] = side.text 3851 if kind: 3852 kwargs["kind"] = kind.text 3853 if hint: 3854 kwargs["hint"] = hint 3855 3856 if self._match(TokenType.MATCH_CONDITION): 3857 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3858 3859 if self._match(TokenType.ON): 3860 kwargs["on"] = self._parse_assignment() 3861 elif self._match(TokenType.USING): 3862 kwargs["using"] = self._parse_using_identifiers() 3863 elif ( 3864 not method 3865 and not (outer_apply or cross_apply) 3866 and not isinstance(kwargs["this"], exp.Unnest) 3867 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3868 ): 3869 index = self._index 3870 joins: t.Optional[list] = list(self._parse_joins()) 3871 3872 if joins and self._match(TokenType.ON): 3873 kwargs["on"] = self._parse_assignment() 3874 elif joins and self._match(TokenType.USING): 3875 kwargs["using"] = self._parse_using_identifiers() 3876 else: 3877 joins = None 3878 self._retreat(index) 3879 3880 kwargs["this"].set("joins", joins if joins else None) 3881 3882 kwargs["pivots"] = self._parse_pivots() 3883 3884 comments = [c for token in (method, side, kind) if token for c in token.comments] 3885 comments = (join_comments or []) + comments 3886 3887 if ( 3888 self.ADD_JOIN_ON_TRUE 3889 and not kwargs.get("on") 3890 and not kwargs.get("using") 3891 and not kwargs.get("method") 3892 and kwargs.get("kind") in (None, "INNER", "OUTER") 3893 ): 3894 kwargs["on"] = exp.true() 3895 3896 return self.expression(exp.Join, comments=comments, **kwargs) 3897 3898 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3899 this = self._parse_assignment() 3900 3901 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3902 return this 3903 3904 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3905 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3906 3907 return this 3908 3909 def _parse_index_params(self) -> exp.IndexParameters: 3910 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3911 3912 if self._match(TokenType.L_PAREN, advance=False): 3913 columns = self._parse_wrapped_csv(self._parse_with_operator) 3914 else: 3915 columns = None 3916 3917 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3918 partition_by = self._parse_partition_by() 3919 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3920 tablespace = ( 3921 self._parse_var(any_token=True) 3922 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3923 else None 3924 ) 3925 where = self._parse_where() 3926 3927 on = self._parse_field() if self._match(TokenType.ON) else None 3928 3929 return self.expression( 3930 exp.IndexParameters, 3931 using=using, 3932 columns=columns, 3933 include=include, 3934 partition_by=partition_by, 3935 where=where, 3936 with_storage=with_storage, 3937 tablespace=tablespace, 3938 on=on, 3939 ) 3940 3941 def _parse_index( 3942 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3943 ) -> t.Optional[exp.Index]: 3944 if index or anonymous: 3945 unique = None 3946 primary = None 3947 amp = None 3948 3949 self._match(TokenType.ON) 3950 self._match(TokenType.TABLE) # hive 3951 table = self._parse_table_parts(schema=True) 3952 else: 3953 unique = self._match(TokenType.UNIQUE) 3954 primary = self._match_text_seq("PRIMARY") 3955 amp = self._match_text_seq("AMP") 3956 3957 if not self._match(TokenType.INDEX): 3958 return None 3959 3960 index = self._parse_id_var() 3961 table = None 3962 3963 params = self._parse_index_params() 3964 3965 return self.expression( 3966 exp.Index, 3967 this=index, 3968 table=table, 3969 unique=unique, 3970 primary=primary, 3971 amp=amp, 3972 params=params, 3973 ) 3974 3975 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3976 hints: t.List[exp.Expression] = [] 3977 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3978 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3979 hints.append( 3980 self.expression( 3981 exp.WithTableHint, 3982 expressions=self._parse_csv( 3983 lambda: self._parse_function() or self._parse_var(any_token=True) 3984 ), 3985 ) 3986 ) 3987 self._match_r_paren() 3988 else: 3989 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3990 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3991 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3992 3993 self._match_set((TokenType.INDEX, TokenType.KEY)) 3994 if self._match(TokenType.FOR): 3995 hint.set("target", self._advance_any() and self._prev.text.upper()) 3996 3997 hint.set("expressions", self._parse_wrapped_id_vars()) 3998 hints.append(hint) 3999 4000 return hints or None 4001 4002 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4003 return ( 4004 (not schema and self._parse_function(optional_parens=False)) 4005 or self._parse_id_var(any_token=False) 4006 or self._parse_string_as_identifier() 4007 or self._parse_placeholder() 4008 ) 4009 4010 def _parse_table_parts( 4011 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4012 ) -> exp.Table: 4013 catalog = None 4014 db = None 4015 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4016 4017 while self._match(TokenType.DOT): 4018 if catalog: 4019 # This allows nesting the table in arbitrarily many dot expressions if needed 4020 table = self.expression( 4021 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4022 ) 4023 else: 4024 catalog = db 4025 db = table 4026 # "" used for tsql FROM a..b case 4027 table = self._parse_table_part(schema=schema) or "" 4028 4029 if ( 4030 wildcard 4031 and self._is_connected() 4032 and (isinstance(table, exp.Identifier) or not table) 4033 and self._match(TokenType.STAR) 4034 ): 4035 if isinstance(table, exp.Identifier): 4036 table.args["this"] += "*" 4037 else: 4038 table = exp.Identifier(this="*") 4039 4040 # We bubble up comments from the Identifier to the Table 4041 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4042 4043 if is_db_reference: 4044 catalog = db 4045 db = table 4046 table = None 4047 4048 if not table and not is_db_reference: 4049 self.raise_error(f"Expected table name but got {self._curr}") 4050 if not db and is_db_reference: 4051 self.raise_error(f"Expected database name but got {self._curr}") 4052 4053 table = self.expression( 4054 exp.Table, 4055 comments=comments, 4056 this=table, 4057 db=db, 4058 catalog=catalog, 4059 ) 4060 4061 changes = self._parse_changes() 4062 if changes: 4063 table.set("changes", changes) 4064 4065 at_before = self._parse_historical_data() 4066 if at_before: 4067 table.set("when", at_before) 4068 4069 pivots = self._parse_pivots() 4070 if pivots: 4071 table.set("pivots", pivots) 4072 4073 return table 4074 4075 def _parse_table( 4076 self, 4077 schema: bool = False, 4078 joins: bool = False, 4079 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4080 parse_bracket: bool = False, 4081 is_db_reference: bool = False, 4082 parse_partition: bool = False, 4083 consume_pipe: bool = False, 4084 ) -> t.Optional[exp.Expression]: 4085 lateral = self._parse_lateral() 4086 if lateral: 4087 return lateral 4088 4089 unnest = self._parse_unnest() 4090 if unnest: 4091 return unnest 4092 4093 values = self._parse_derived_table_values() 4094 if values: 4095 return values 4096 4097 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4098 if subquery: 4099 if not subquery.args.get("pivots"): 4100 subquery.set("pivots", self._parse_pivots()) 4101 return subquery 4102 4103 bracket = parse_bracket and self._parse_bracket(None) 4104 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4105 4106 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4107 self._parse_table 4108 ) 4109 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4110 4111 only = self._match(TokenType.ONLY) 4112 4113 this = t.cast( 4114 exp.Expression, 4115 bracket 4116 or rows_from 4117 or self._parse_bracket( 4118 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4119 ), 4120 ) 4121 4122 if only: 4123 this.set("only", only) 4124 4125 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4126 self._match_text_seq("*") 4127 4128 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4129 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4130 this.set("partition", self._parse_partition()) 4131 4132 if schema: 4133 return self._parse_schema(this=this) 4134 4135 version = self._parse_version() 4136 4137 if version: 4138 this.set("version", version) 4139 4140 if self.dialect.ALIAS_POST_TABLESAMPLE: 4141 this.set("sample", self._parse_table_sample()) 4142 4143 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4144 if alias: 4145 this.set("alias", alias) 4146 4147 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4148 return self.expression( 4149 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4150 ) 4151 4152 this.set("hints", self._parse_table_hints()) 4153 4154 if not this.args.get("pivots"): 4155 this.set("pivots", self._parse_pivots()) 4156 4157 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4158 this.set("sample", self._parse_table_sample()) 4159 4160 if joins: 4161 for join in self._parse_joins(): 4162 this.append("joins", join) 4163 4164 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4165 this.set("ordinality", True) 4166 this.set("alias", self._parse_table_alias()) 4167 4168 return this 4169 4170 def _parse_version(self) -> t.Optional[exp.Version]: 4171 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4172 this = "TIMESTAMP" 4173 elif self._match(TokenType.VERSION_SNAPSHOT): 4174 this = "VERSION" 4175 else: 4176 return None 4177 4178 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4179 kind = self._prev.text.upper() 4180 start = self._parse_bitwise() 4181 self._match_texts(("TO", "AND")) 4182 end = self._parse_bitwise() 4183 expression: t.Optional[exp.Expression] = self.expression( 4184 exp.Tuple, expressions=[start, end] 4185 ) 4186 elif self._match_text_seq("CONTAINED", "IN"): 4187 kind = "CONTAINED IN" 4188 expression = self.expression( 4189 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4190 ) 4191 elif self._match(TokenType.ALL): 4192 kind = "ALL" 4193 expression = None 4194 else: 4195 self._match_text_seq("AS", "OF") 4196 kind = "AS OF" 4197 expression = self._parse_type() 4198 4199 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4200 4201 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4202 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4203 index = self._index 4204 historical_data = None 4205 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4206 this = self._prev.text.upper() 4207 kind = ( 4208 self._match(TokenType.L_PAREN) 4209 and self._match_texts(self.HISTORICAL_DATA_KIND) 4210 and self._prev.text.upper() 4211 ) 4212 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4213 4214 if expression: 4215 self._match_r_paren() 4216 historical_data = self.expression( 4217 exp.HistoricalData, this=this, kind=kind, expression=expression 4218 ) 4219 else: 4220 self._retreat(index) 4221 4222 return historical_data 4223 4224 def _parse_changes(self) -> t.Optional[exp.Changes]: 4225 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4226 return None 4227 4228 information = self._parse_var(any_token=True) 4229 self._match_r_paren() 4230 4231 return self.expression( 4232 exp.Changes, 4233 information=information, 4234 at_before=self._parse_historical_data(), 4235 end=self._parse_historical_data(), 4236 ) 4237 4238 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4239 if not self._match(TokenType.UNNEST): 4240 return None 4241 4242 expressions = self._parse_wrapped_csv(self._parse_equality) 4243 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4244 4245 alias = self._parse_table_alias() if with_alias else None 4246 4247 if alias: 4248 if self.dialect.UNNEST_COLUMN_ONLY: 4249 if alias.args.get("columns"): 4250 self.raise_error("Unexpected extra column alias in unnest.") 4251 4252 alias.set("columns", [alias.this]) 4253 alias.set("this", None) 4254 4255 columns = alias.args.get("columns") or [] 4256 if offset and len(expressions) < len(columns): 4257 offset = columns.pop() 4258 4259 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4260 self._match(TokenType.ALIAS) 4261 offset = self._parse_id_var( 4262 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4263 ) or exp.to_identifier("offset") 4264 4265 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4266 4267 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4268 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4269 if not is_derived and not ( 4270 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4271 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4272 ): 4273 return None 4274 4275 expressions = self._parse_csv(self._parse_value) 4276 alias = self._parse_table_alias() 4277 4278 if is_derived: 4279 self._match_r_paren() 4280 4281 return self.expression( 4282 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4283 ) 4284 4285 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4286 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4287 as_modifier and self._match_text_seq("USING", "SAMPLE") 4288 ): 4289 return None 4290 4291 bucket_numerator = None 4292 bucket_denominator = None 4293 bucket_field = None 4294 percent = None 4295 size = None 4296 seed = None 4297 4298 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4299 matched_l_paren = self._match(TokenType.L_PAREN) 4300 4301 if self.TABLESAMPLE_CSV: 4302 num = None 4303 expressions = self._parse_csv(self._parse_primary) 4304 else: 4305 expressions = None 4306 num = ( 4307 self._parse_factor() 4308 if self._match(TokenType.NUMBER, advance=False) 4309 else self._parse_primary() or self._parse_placeholder() 4310 ) 4311 4312 if self._match_text_seq("BUCKET"): 4313 bucket_numerator = self._parse_number() 4314 self._match_text_seq("OUT", "OF") 4315 bucket_denominator = bucket_denominator = self._parse_number() 4316 self._match(TokenType.ON) 4317 bucket_field = self._parse_field() 4318 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4319 percent = num 4320 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4321 size = num 4322 else: 4323 percent = num 4324 4325 if matched_l_paren: 4326 self._match_r_paren() 4327 4328 if self._match(TokenType.L_PAREN): 4329 method = self._parse_var(upper=True) 4330 seed = self._match(TokenType.COMMA) and self._parse_number() 4331 self._match_r_paren() 4332 elif self._match_texts(("SEED", "REPEATABLE")): 4333 seed = self._parse_wrapped(self._parse_number) 4334 4335 if not method and self.DEFAULT_SAMPLING_METHOD: 4336 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4337 4338 return self.expression( 4339 exp.TableSample, 4340 expressions=expressions, 4341 method=method, 4342 bucket_numerator=bucket_numerator, 4343 bucket_denominator=bucket_denominator, 4344 bucket_field=bucket_field, 4345 percent=percent, 4346 size=size, 4347 seed=seed, 4348 ) 4349 4350 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4351 return list(iter(self._parse_pivot, None)) or None 4352 4353 def _parse_joins(self) -> t.Iterator[exp.Join]: 4354 return iter(self._parse_join, None) 4355 4356 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4357 if not self._match(TokenType.INTO): 4358 return None 4359 4360 return self.expression( 4361 exp.UnpivotColumns, 4362 this=self._match_text_seq("NAME") and self._parse_column(), 4363 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4364 ) 4365 4366 # https://duckdb.org/docs/sql/statements/pivot 4367 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4368 def _parse_on() -> t.Optional[exp.Expression]: 4369 this = self._parse_bitwise() 4370 4371 if self._match(TokenType.IN): 4372 # PIVOT ... ON col IN (row_val1, row_val2) 4373 return self._parse_in(this) 4374 if self._match(TokenType.ALIAS, advance=False): 4375 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4376 return self._parse_alias(this) 4377 4378 return this 4379 4380 this = self._parse_table() 4381 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4382 into = self._parse_unpivot_columns() 4383 using = self._match(TokenType.USING) and self._parse_csv( 4384 lambda: self._parse_alias(self._parse_function()) 4385 ) 4386 group = self._parse_group() 4387 4388 return self.expression( 4389 exp.Pivot, 4390 this=this, 4391 expressions=expressions, 4392 using=using, 4393 group=group, 4394 unpivot=is_unpivot, 4395 into=into, 4396 ) 4397 4398 def _parse_pivot_in(self) -> exp.In: 4399 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4400 this = self._parse_select_or_expression() 4401 4402 self._match(TokenType.ALIAS) 4403 alias = self._parse_bitwise() 4404 if alias: 4405 if isinstance(alias, exp.Column) and not alias.db: 4406 alias = alias.this 4407 return self.expression(exp.PivotAlias, this=this, alias=alias) 4408 4409 return this 4410 4411 value = self._parse_column() 4412 4413 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4414 self.raise_error("Expecting IN (") 4415 4416 if self._match(TokenType.ANY): 4417 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4418 else: 4419 exprs = self._parse_csv(_parse_aliased_expression) 4420 4421 self._match_r_paren() 4422 return self.expression(exp.In, this=value, expressions=exprs) 4423 4424 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4425 func = self._parse_function() 4426 if not func: 4427 if self._prev and self._prev.token_type == TokenType.COMMA: 4428 return None 4429 self.raise_error("Expecting an aggregation function in PIVOT") 4430 4431 return self._parse_alias(func) 4432 4433 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4434 index = self._index 4435 include_nulls = None 4436 4437 if self._match(TokenType.PIVOT): 4438 unpivot = False 4439 elif self._match(TokenType.UNPIVOT): 4440 unpivot = True 4441 4442 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4443 if self._match_text_seq("INCLUDE", "NULLS"): 4444 include_nulls = True 4445 elif self._match_text_seq("EXCLUDE", "NULLS"): 4446 include_nulls = False 4447 else: 4448 return None 4449 4450 expressions = [] 4451 4452 if not self._match(TokenType.L_PAREN): 4453 self._retreat(index) 4454 return None 4455 4456 if unpivot: 4457 expressions = self._parse_csv(self._parse_column) 4458 else: 4459 expressions = self._parse_csv(self._parse_pivot_aggregation) 4460 4461 if not expressions: 4462 self.raise_error("Failed to parse PIVOT's aggregation list") 4463 4464 if not self._match(TokenType.FOR): 4465 self.raise_error("Expecting FOR") 4466 4467 fields = [] 4468 while True: 4469 field = self._try_parse(self._parse_pivot_in) 4470 if not field: 4471 break 4472 fields.append(field) 4473 4474 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4475 self._parse_bitwise 4476 ) 4477 4478 group = self._parse_group() 4479 4480 self._match_r_paren() 4481 4482 pivot = self.expression( 4483 exp.Pivot, 4484 expressions=expressions, 4485 fields=fields, 4486 unpivot=unpivot, 4487 include_nulls=include_nulls, 4488 default_on_null=default_on_null, 4489 group=group, 4490 ) 4491 4492 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4493 pivot.set("alias", self._parse_table_alias()) 4494 4495 if not unpivot: 4496 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4497 4498 columns: t.List[exp.Expression] = [] 4499 all_fields = [] 4500 for pivot_field in pivot.fields: 4501 pivot_field_expressions = pivot_field.expressions 4502 4503 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4504 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4505 continue 4506 4507 all_fields.append( 4508 [ 4509 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4510 for fld in pivot_field_expressions 4511 ] 4512 ) 4513 4514 if all_fields: 4515 if names: 4516 all_fields.append(names) 4517 4518 # Generate all possible combinations of the pivot columns 4519 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4520 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4521 for fld_parts_tuple in itertools.product(*all_fields): 4522 fld_parts = list(fld_parts_tuple) 4523 4524 if names and self.PREFIXED_PIVOT_COLUMNS: 4525 # Move the "name" to the front of the list 4526 fld_parts.insert(0, fld_parts.pop(-1)) 4527 4528 columns.append(exp.to_identifier("_".join(fld_parts))) 4529 4530 pivot.set("columns", columns) 4531 4532 return pivot 4533 4534 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4535 return [agg.alias for agg in aggregations if agg.alias] 4536 4537 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4538 if not skip_where_token and not self._match(TokenType.PREWHERE): 4539 return None 4540 4541 return self.expression( 4542 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4543 ) 4544 4545 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4546 if not skip_where_token and not self._match(TokenType.WHERE): 4547 return None 4548 4549 return self.expression( 4550 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4551 ) 4552 4553 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4554 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4555 return None 4556 comments = self._prev_comments 4557 4558 elements: t.Dict[str, t.Any] = defaultdict(list) 4559 4560 if self._match(TokenType.ALL): 4561 elements["all"] = True 4562 elif self._match(TokenType.DISTINCT): 4563 elements["all"] = False 4564 4565 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4566 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4567 4568 while True: 4569 index = self._index 4570 4571 elements["expressions"].extend( 4572 self._parse_csv( 4573 lambda: None 4574 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4575 else self._parse_assignment() 4576 ) 4577 ) 4578 4579 before_with_index = self._index 4580 with_prefix = self._match(TokenType.WITH) 4581 4582 if self._match(TokenType.ROLLUP): 4583 elements["rollup"].append( 4584 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4585 ) 4586 elif self._match(TokenType.CUBE): 4587 elements["cube"].append( 4588 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4589 ) 4590 elif self._match(TokenType.GROUPING_SETS): 4591 elements["grouping_sets"].append( 4592 self.expression( 4593 exp.GroupingSets, 4594 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4595 ) 4596 ) 4597 elif self._match_text_seq("TOTALS"): 4598 elements["totals"] = True # type: ignore 4599 4600 if before_with_index <= self._index <= before_with_index + 1: 4601 self._retreat(before_with_index) 4602 break 4603 4604 if index == self._index: 4605 break 4606 4607 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4608 4609 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4610 return self.expression( 4611 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4612 ) 4613 4614 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4615 if self._match(TokenType.L_PAREN): 4616 grouping_set = self._parse_csv(self._parse_column) 4617 self._match_r_paren() 4618 return self.expression(exp.Tuple, expressions=grouping_set) 4619 4620 return self._parse_column() 4621 4622 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4623 if not skip_having_token and not self._match(TokenType.HAVING): 4624 return None 4625 return self.expression( 4626 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4627 ) 4628 4629 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4630 if not self._match(TokenType.QUALIFY): 4631 return None 4632 return self.expression(exp.Qualify, this=self._parse_assignment()) 4633 4634 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4635 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4636 exp.Prior, this=self._parse_bitwise() 4637 ) 4638 connect = self._parse_assignment() 4639 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4640 return connect 4641 4642 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4643 if skip_start_token: 4644 start = None 4645 elif self._match(TokenType.START_WITH): 4646 start = self._parse_assignment() 4647 else: 4648 return None 4649 4650 self._match(TokenType.CONNECT_BY) 4651 nocycle = self._match_text_seq("NOCYCLE") 4652 connect = self._parse_connect_with_prior() 4653 4654 if not start and self._match(TokenType.START_WITH): 4655 start = self._parse_assignment() 4656 4657 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4658 4659 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4660 this = self._parse_id_var(any_token=True) 4661 if self._match(TokenType.ALIAS): 4662 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4663 return this 4664 4665 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4666 if self._match_text_seq("INTERPOLATE"): 4667 return self._parse_wrapped_csv(self._parse_name_as_expression) 4668 return None 4669 4670 def _parse_order( 4671 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4672 ) -> t.Optional[exp.Expression]: 4673 siblings = None 4674 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4675 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4676 return this 4677 4678 siblings = True 4679 4680 return self.expression( 4681 exp.Order, 4682 comments=self._prev_comments, 4683 this=this, 4684 expressions=self._parse_csv(self._parse_ordered), 4685 siblings=siblings, 4686 ) 4687 4688 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4689 if not self._match(token): 4690 return None 4691 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4692 4693 def _parse_ordered( 4694 self, parse_method: t.Optional[t.Callable] = None 4695 ) -> t.Optional[exp.Ordered]: 4696 this = parse_method() if parse_method else self._parse_assignment() 4697 if not this: 4698 return None 4699 4700 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4701 this = exp.var("ALL") 4702 4703 asc = self._match(TokenType.ASC) 4704 desc = self._match(TokenType.DESC) or (asc and False) 4705 4706 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4707 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4708 4709 nulls_first = is_nulls_first or False 4710 explicitly_null_ordered = is_nulls_first or is_nulls_last 4711 4712 if ( 4713 not explicitly_null_ordered 4714 and ( 4715 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4716 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4717 ) 4718 and self.dialect.NULL_ORDERING != "nulls_are_last" 4719 ): 4720 nulls_first = True 4721 4722 if self._match_text_seq("WITH", "FILL"): 4723 with_fill = self.expression( 4724 exp.WithFill, 4725 **{ # type: ignore 4726 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4727 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4728 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4729 "interpolate": self._parse_interpolate(), 4730 }, 4731 ) 4732 else: 4733 with_fill = None 4734 4735 return self.expression( 4736 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4737 ) 4738 4739 def _parse_limit_options(self) -> exp.LimitOptions: 4740 percent = self._match(TokenType.PERCENT) 4741 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4742 self._match_text_seq("ONLY") 4743 with_ties = self._match_text_seq("WITH", "TIES") 4744 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4745 4746 def _parse_limit( 4747 self, 4748 this: t.Optional[exp.Expression] = None, 4749 top: bool = False, 4750 skip_limit_token: bool = False, 4751 ) -> t.Optional[exp.Expression]: 4752 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4753 comments = self._prev_comments 4754 if top: 4755 limit_paren = self._match(TokenType.L_PAREN) 4756 expression = self._parse_term() if limit_paren else self._parse_number() 4757 4758 if limit_paren: 4759 self._match_r_paren() 4760 4761 limit_options = self._parse_limit_options() 4762 else: 4763 limit_options = None 4764 expression = self._parse_term() 4765 4766 if self._match(TokenType.COMMA): 4767 offset = expression 4768 expression = self._parse_term() 4769 else: 4770 offset = None 4771 4772 limit_exp = self.expression( 4773 exp.Limit, 4774 this=this, 4775 expression=expression, 4776 offset=offset, 4777 comments=comments, 4778 limit_options=limit_options, 4779 expressions=self._parse_limit_by(), 4780 ) 4781 4782 return limit_exp 4783 4784 if self._match(TokenType.FETCH): 4785 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4786 direction = self._prev.text.upper() if direction else "FIRST" 4787 4788 count = self._parse_field(tokens=self.FETCH_TOKENS) 4789 4790 return self.expression( 4791 exp.Fetch, 4792 direction=direction, 4793 count=count, 4794 limit_options=self._parse_limit_options(), 4795 ) 4796 4797 return this 4798 4799 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4800 if not self._match(TokenType.OFFSET): 4801 return this 4802 4803 count = self._parse_term() 4804 self._match_set((TokenType.ROW, TokenType.ROWS)) 4805 4806 return self.expression( 4807 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4808 ) 4809 4810 def _can_parse_limit_or_offset(self) -> bool: 4811 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4812 return False 4813 4814 index = self._index 4815 result = bool( 4816 self._try_parse(self._parse_limit, retreat=True) 4817 or self._try_parse(self._parse_offset, retreat=True) 4818 ) 4819 self._retreat(index) 4820 return result 4821 4822 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4823 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4824 4825 def _parse_locks(self) -> t.List[exp.Lock]: 4826 locks = [] 4827 while True: 4828 update, key = None, None 4829 if self._match_text_seq("FOR", "UPDATE"): 4830 update = True 4831 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4832 "LOCK", "IN", "SHARE", "MODE" 4833 ): 4834 update = False 4835 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4836 update, key = False, True 4837 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4838 update, key = True, True 4839 else: 4840 break 4841 4842 expressions = None 4843 if self._match_text_seq("OF"): 4844 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4845 4846 wait: t.Optional[bool | exp.Expression] = None 4847 if self._match_text_seq("NOWAIT"): 4848 wait = True 4849 elif self._match_text_seq("WAIT"): 4850 wait = self._parse_primary() 4851 elif self._match_text_seq("SKIP", "LOCKED"): 4852 wait = False 4853 4854 locks.append( 4855 self.expression( 4856 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4857 ) 4858 ) 4859 4860 return locks 4861 4862 def parse_set_operation( 4863 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4864 ) -> t.Optional[exp.Expression]: 4865 start = self._index 4866 _, side_token, kind_token = self._parse_join_parts() 4867 4868 side = side_token.text if side_token else None 4869 kind = kind_token.text if kind_token else None 4870 4871 if not self._match_set(self.SET_OPERATIONS): 4872 self._retreat(start) 4873 return None 4874 4875 token_type = self._prev.token_type 4876 4877 if token_type == TokenType.UNION: 4878 operation: t.Type[exp.SetOperation] = exp.Union 4879 elif token_type == TokenType.EXCEPT: 4880 operation = exp.Except 4881 else: 4882 operation = exp.Intersect 4883 4884 comments = self._prev.comments 4885 4886 if self._match(TokenType.DISTINCT): 4887 distinct: t.Optional[bool] = True 4888 elif self._match(TokenType.ALL): 4889 distinct = False 4890 else: 4891 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4892 if distinct is None: 4893 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4894 4895 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4896 "STRICT", "CORRESPONDING" 4897 ) 4898 if self._match_text_seq("CORRESPONDING"): 4899 by_name = True 4900 if not side and not kind: 4901 kind = "INNER" 4902 4903 on_column_list = None 4904 if by_name and self._match_texts(("ON", "BY")): 4905 on_column_list = self._parse_wrapped_csv(self._parse_column) 4906 4907 expression = self._parse_select( 4908 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4909 ) 4910 4911 return self.expression( 4912 operation, 4913 comments=comments, 4914 this=this, 4915 distinct=distinct, 4916 by_name=by_name, 4917 expression=expression, 4918 side=side, 4919 kind=kind, 4920 on=on_column_list, 4921 ) 4922 4923 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4924 while this: 4925 setop = self.parse_set_operation(this) 4926 if not setop: 4927 break 4928 this = setop 4929 4930 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4931 expression = this.expression 4932 4933 if expression: 4934 for arg in self.SET_OP_MODIFIERS: 4935 expr = expression.args.get(arg) 4936 if expr: 4937 this.set(arg, expr.pop()) 4938 4939 return this 4940 4941 def _parse_expression(self) -> t.Optional[exp.Expression]: 4942 return self._parse_alias(self._parse_assignment()) 4943 4944 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4945 this = self._parse_disjunction() 4946 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4947 # This allows us to parse <non-identifier token> := <expr> 4948 this = exp.column( 4949 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4950 ) 4951 4952 while self._match_set(self.ASSIGNMENT): 4953 if isinstance(this, exp.Column) and len(this.parts) == 1: 4954 this = this.this 4955 4956 this = self.expression( 4957 self.ASSIGNMENT[self._prev.token_type], 4958 this=this, 4959 comments=self._prev_comments, 4960 expression=self._parse_assignment(), 4961 ) 4962 4963 return this 4964 4965 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4966 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4967 4968 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4969 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4970 4971 def _parse_equality(self) -> t.Optional[exp.Expression]: 4972 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4973 4974 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4975 return self._parse_tokens(self._parse_range, self.COMPARISON) 4976 4977 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4978 this = this or self._parse_bitwise() 4979 negate = self._match(TokenType.NOT) 4980 4981 if self._match_set(self.RANGE_PARSERS): 4982 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4983 if not expression: 4984 return this 4985 4986 this = expression 4987 elif self._match(TokenType.ISNULL): 4988 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4989 4990 # Postgres supports ISNULL and NOTNULL for conditions. 4991 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4992 if self._match(TokenType.NOTNULL): 4993 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4994 this = self.expression(exp.Not, this=this) 4995 4996 if negate: 4997 this = self._negate_range(this) 4998 4999 if self._match(TokenType.IS): 5000 this = self._parse_is(this) 5001 5002 return this 5003 5004 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5005 if not this: 5006 return this 5007 5008 return self.expression(exp.Not, this=this) 5009 5010 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5011 index = self._index - 1 5012 negate = self._match(TokenType.NOT) 5013 5014 if self._match_text_seq("DISTINCT", "FROM"): 5015 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5016 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5017 5018 if self._match(TokenType.JSON): 5019 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5020 5021 if self._match_text_seq("WITH"): 5022 _with = True 5023 elif self._match_text_seq("WITHOUT"): 5024 _with = False 5025 else: 5026 _with = None 5027 5028 unique = self._match(TokenType.UNIQUE) 5029 self._match_text_seq("KEYS") 5030 expression: t.Optional[exp.Expression] = self.expression( 5031 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5032 ) 5033 else: 5034 expression = self._parse_primary() or self._parse_null() 5035 if not expression: 5036 self._retreat(index) 5037 return None 5038 5039 this = self.expression(exp.Is, this=this, expression=expression) 5040 return self.expression(exp.Not, this=this) if negate else this 5041 5042 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5043 unnest = self._parse_unnest(with_alias=False) 5044 if unnest: 5045 this = self.expression(exp.In, this=this, unnest=unnest) 5046 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5047 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5048 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5049 5050 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5051 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5052 else: 5053 this = self.expression(exp.In, this=this, expressions=expressions) 5054 5055 if matched_l_paren: 5056 self._match_r_paren(this) 5057 elif not self._match(TokenType.R_BRACKET, expression=this): 5058 self.raise_error("Expecting ]") 5059 else: 5060 this = self.expression(exp.In, this=this, field=self._parse_column()) 5061 5062 return this 5063 5064 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5065 symmetric = None 5066 if self._match_text_seq("SYMMETRIC"): 5067 symmetric = True 5068 elif self._match_text_seq("ASYMMETRIC"): 5069 symmetric = False 5070 5071 low = self._parse_bitwise() 5072 self._match(TokenType.AND) 5073 high = self._parse_bitwise() 5074 5075 return self.expression( 5076 exp.Between, 5077 this=this, 5078 low=low, 5079 high=high, 5080 symmetric=symmetric, 5081 ) 5082 5083 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5084 if not self._match(TokenType.ESCAPE): 5085 return this 5086 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5087 5088 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5089 index = self._index 5090 5091 if not self._match(TokenType.INTERVAL) and match_interval: 5092 return None 5093 5094 if self._match(TokenType.STRING, advance=False): 5095 this = self._parse_primary() 5096 else: 5097 this = self._parse_term() 5098 5099 if not this or ( 5100 isinstance(this, exp.Column) 5101 and not this.table 5102 and not this.this.quoted 5103 and this.name.upper() == "IS" 5104 ): 5105 self._retreat(index) 5106 return None 5107 5108 unit = self._parse_function() or ( 5109 not self._match(TokenType.ALIAS, advance=False) 5110 and self._parse_var(any_token=True, upper=True) 5111 ) 5112 5113 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5114 # each INTERVAL expression into this canonical form so it's easy to transpile 5115 if this and this.is_number: 5116 this = exp.Literal.string(this.to_py()) 5117 elif this and this.is_string: 5118 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5119 if parts and unit: 5120 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5121 unit = None 5122 self._retreat(self._index - 1) 5123 5124 if len(parts) == 1: 5125 this = exp.Literal.string(parts[0][0]) 5126 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5127 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5128 unit = self.expression( 5129 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5130 ) 5131 5132 interval = self.expression(exp.Interval, this=this, unit=unit) 5133 5134 index = self._index 5135 self._match(TokenType.PLUS) 5136 5137 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5138 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5139 return self.expression( 5140 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5141 ) 5142 5143 self._retreat(index) 5144 return interval 5145 5146 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5147 this = self._parse_term() 5148 5149 while True: 5150 if self._match_set(self.BITWISE): 5151 this = self.expression( 5152 self.BITWISE[self._prev.token_type], 5153 this=this, 5154 expression=self._parse_term(), 5155 ) 5156 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5157 this = self.expression( 5158 exp.DPipe, 5159 this=this, 5160 expression=self._parse_term(), 5161 safe=not self.dialect.STRICT_STRING_CONCAT, 5162 ) 5163 elif self._match(TokenType.DQMARK): 5164 this = self.expression( 5165 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5166 ) 5167 elif self._match_pair(TokenType.LT, TokenType.LT): 5168 this = self.expression( 5169 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5170 ) 5171 elif self._match_pair(TokenType.GT, TokenType.GT): 5172 this = self.expression( 5173 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5174 ) 5175 else: 5176 break 5177 5178 return this 5179 5180 def _parse_term(self) -> t.Optional[exp.Expression]: 5181 this = self._parse_factor() 5182 5183 while self._match_set(self.TERM): 5184 klass = self.TERM[self._prev.token_type] 5185 comments = self._prev_comments 5186 expression = self._parse_factor() 5187 5188 this = self.expression(klass, this=this, comments=comments, expression=expression) 5189 5190 if isinstance(this, exp.Collate): 5191 expr = this.expression 5192 5193 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5194 # fallback to Identifier / Var 5195 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5196 ident = expr.this 5197 if isinstance(ident, exp.Identifier): 5198 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5199 5200 return this 5201 5202 def _parse_factor(self) -> t.Optional[exp.Expression]: 5203 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5204 this = parse_method() 5205 5206 while self._match_set(self.FACTOR): 5207 klass = self.FACTOR[self._prev.token_type] 5208 comments = self._prev_comments 5209 expression = parse_method() 5210 5211 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5212 self._retreat(self._index - 1) 5213 return this 5214 5215 this = self.expression(klass, this=this, comments=comments, expression=expression) 5216 5217 if isinstance(this, exp.Div): 5218 this.args["typed"] = self.dialect.TYPED_DIVISION 5219 this.args["safe"] = self.dialect.SAFE_DIVISION 5220 5221 return this 5222 5223 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5224 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5225 5226 def _parse_unary(self) -> t.Optional[exp.Expression]: 5227 if self._match_set(self.UNARY_PARSERS): 5228 return self.UNARY_PARSERS[self._prev.token_type](self) 5229 return self._parse_at_time_zone(self._parse_type()) 5230 5231 def _parse_type( 5232 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5233 ) -> t.Optional[exp.Expression]: 5234 interval = parse_interval and self._parse_interval() 5235 if interval: 5236 return interval 5237 5238 index = self._index 5239 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5240 5241 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5242 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5243 if isinstance(data_type, exp.Cast): 5244 # This constructor can contain ops directly after it, for instance struct unnesting: 5245 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5246 return self._parse_column_ops(data_type) 5247 5248 if data_type: 5249 index2 = self._index 5250 this = self._parse_primary() 5251 5252 if isinstance(this, exp.Literal): 5253 literal = this.name 5254 this = self._parse_column_ops(this) 5255 5256 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5257 if parser: 5258 return parser(self, this, data_type) 5259 5260 if ( 5261 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5262 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5263 and TIME_ZONE_RE.search(literal) 5264 ): 5265 data_type = exp.DataType.build("TIMESTAMPTZ") 5266 5267 return self.expression(exp.Cast, this=this, to=data_type) 5268 5269 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5270 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5271 # 5272 # If the index difference here is greater than 1, that means the parser itself must have 5273 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5274 # 5275 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5276 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5277 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5278 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5279 # 5280 # In these cases, we don't really want to return the converted type, but instead retreat 5281 # and try to parse a Column or Identifier in the section below. 5282 if data_type.expressions and index2 - index > 1: 5283 self._retreat(index2) 5284 return self._parse_column_ops(data_type) 5285 5286 self._retreat(index) 5287 5288 if fallback_to_identifier: 5289 return self._parse_id_var() 5290 5291 this = self._parse_column() 5292 return this and self._parse_column_ops(this) 5293 5294 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5295 this = self._parse_type() 5296 if not this: 5297 return None 5298 5299 if isinstance(this, exp.Column) and not this.table: 5300 this = exp.var(this.name.upper()) 5301 5302 return self.expression( 5303 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5304 ) 5305 5306 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5307 type_name = identifier.name 5308 5309 while self._match(TokenType.DOT): 5310 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5311 5312 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5313 5314 def _parse_types( 5315 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5316 ) -> t.Optional[exp.Expression]: 5317 index = self._index 5318 5319 this: t.Optional[exp.Expression] = None 5320 prefix = self._match_text_seq("SYSUDTLIB", ".") 5321 5322 if self._match_set(self.TYPE_TOKENS): 5323 type_token = self._prev.token_type 5324 else: 5325 type_token = None 5326 identifier = allow_identifiers and self._parse_id_var( 5327 any_token=False, tokens=(TokenType.VAR,) 5328 ) 5329 if isinstance(identifier, exp.Identifier): 5330 try: 5331 tokens = self.dialect.tokenize(identifier.name) 5332 except TokenError: 5333 tokens = None 5334 5335 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5336 type_token = tokens[0].token_type 5337 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5338 this = self._parse_user_defined_type(identifier) 5339 else: 5340 self._retreat(self._index - 1) 5341 return None 5342 else: 5343 return None 5344 5345 if type_token == TokenType.PSEUDO_TYPE: 5346 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5347 5348 if type_token == TokenType.OBJECT_IDENTIFIER: 5349 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5350 5351 # https://materialize.com/docs/sql/types/map/ 5352 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5353 key_type = self._parse_types( 5354 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5355 ) 5356 if not self._match(TokenType.FARROW): 5357 self._retreat(index) 5358 return None 5359 5360 value_type = self._parse_types( 5361 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5362 ) 5363 if not self._match(TokenType.R_BRACKET): 5364 self._retreat(index) 5365 return None 5366 5367 return exp.DataType( 5368 this=exp.DataType.Type.MAP, 5369 expressions=[key_type, value_type], 5370 nested=True, 5371 prefix=prefix, 5372 ) 5373 5374 nested = type_token in self.NESTED_TYPE_TOKENS 5375 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5376 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5377 expressions = None 5378 maybe_func = False 5379 5380 if self._match(TokenType.L_PAREN): 5381 if is_struct: 5382 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5383 elif nested: 5384 expressions = self._parse_csv( 5385 lambda: self._parse_types( 5386 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5387 ) 5388 ) 5389 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5390 this = expressions[0] 5391 this.set("nullable", True) 5392 self._match_r_paren() 5393 return this 5394 elif type_token in self.ENUM_TYPE_TOKENS: 5395 expressions = self._parse_csv(self._parse_equality) 5396 elif is_aggregate: 5397 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5398 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5399 ) 5400 if not func_or_ident: 5401 return None 5402 expressions = [func_or_ident] 5403 if self._match(TokenType.COMMA): 5404 expressions.extend( 5405 self._parse_csv( 5406 lambda: self._parse_types( 5407 check_func=check_func, 5408 schema=schema, 5409 allow_identifiers=allow_identifiers, 5410 ) 5411 ) 5412 ) 5413 else: 5414 expressions = self._parse_csv(self._parse_type_size) 5415 5416 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5417 if type_token == TokenType.VECTOR and len(expressions) == 2: 5418 expressions = self._parse_vector_expressions(expressions) 5419 5420 if not self._match(TokenType.R_PAREN): 5421 self._retreat(index) 5422 return None 5423 5424 maybe_func = True 5425 5426 values: t.Optional[t.List[exp.Expression]] = None 5427 5428 if nested and self._match(TokenType.LT): 5429 if is_struct: 5430 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5431 else: 5432 expressions = self._parse_csv( 5433 lambda: self._parse_types( 5434 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5435 ) 5436 ) 5437 5438 if not self._match(TokenType.GT): 5439 self.raise_error("Expecting >") 5440 5441 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5442 values = self._parse_csv(self._parse_assignment) 5443 if not values and is_struct: 5444 values = None 5445 self._retreat(self._index - 1) 5446 else: 5447 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5448 5449 if type_token in self.TIMESTAMPS: 5450 if self._match_text_seq("WITH", "TIME", "ZONE"): 5451 maybe_func = False 5452 tz_type = ( 5453 exp.DataType.Type.TIMETZ 5454 if type_token in self.TIMES 5455 else exp.DataType.Type.TIMESTAMPTZ 5456 ) 5457 this = exp.DataType(this=tz_type, expressions=expressions) 5458 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5459 maybe_func = False 5460 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5461 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5462 maybe_func = False 5463 elif type_token == TokenType.INTERVAL: 5464 unit = self._parse_var(upper=True) 5465 if unit: 5466 if self._match_text_seq("TO"): 5467 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5468 5469 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5470 else: 5471 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5472 elif type_token == TokenType.VOID: 5473 this = exp.DataType(this=exp.DataType.Type.NULL) 5474 5475 if maybe_func and check_func: 5476 index2 = self._index 5477 peek = self._parse_string() 5478 5479 if not peek: 5480 self._retreat(index) 5481 return None 5482 5483 self._retreat(index2) 5484 5485 if not this: 5486 if self._match_text_seq("UNSIGNED"): 5487 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5488 if not unsigned_type_token: 5489 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5490 5491 type_token = unsigned_type_token or type_token 5492 5493 this = exp.DataType( 5494 this=exp.DataType.Type[type_token.value], 5495 expressions=expressions, 5496 nested=nested, 5497 prefix=prefix, 5498 ) 5499 5500 # Empty arrays/structs are allowed 5501 if values is not None: 5502 cls = exp.Struct if is_struct else exp.Array 5503 this = exp.cast(cls(expressions=values), this, copy=False) 5504 5505 elif expressions: 5506 this.set("expressions", expressions) 5507 5508 # https://materialize.com/docs/sql/types/list/#type-name 5509 while self._match(TokenType.LIST): 5510 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5511 5512 index = self._index 5513 5514 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5515 matched_array = self._match(TokenType.ARRAY) 5516 5517 while self._curr: 5518 datatype_token = self._prev.token_type 5519 matched_l_bracket = self._match(TokenType.L_BRACKET) 5520 5521 if (not matched_l_bracket and not matched_array) or ( 5522 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5523 ): 5524 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5525 # not to be confused with the fixed size array parsing 5526 break 5527 5528 matched_array = False 5529 values = self._parse_csv(self._parse_assignment) or None 5530 if ( 5531 values 5532 and not schema 5533 and ( 5534 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5535 ) 5536 ): 5537 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5538 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5539 self._retreat(index) 5540 break 5541 5542 this = exp.DataType( 5543 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5544 ) 5545 self._match(TokenType.R_BRACKET) 5546 5547 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5548 converter = self.TYPE_CONVERTERS.get(this.this) 5549 if converter: 5550 this = converter(t.cast(exp.DataType, this)) 5551 5552 return this 5553 5554 def _parse_vector_expressions( 5555 self, expressions: t.List[exp.Expression] 5556 ) -> t.List[exp.Expression]: 5557 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5558 5559 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5560 index = self._index 5561 5562 if ( 5563 self._curr 5564 and self._next 5565 and self._curr.token_type in self.TYPE_TOKENS 5566 and self._next.token_type in self.TYPE_TOKENS 5567 ): 5568 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5569 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5570 this = self._parse_id_var() 5571 else: 5572 this = ( 5573 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5574 or self._parse_id_var() 5575 ) 5576 5577 self._match(TokenType.COLON) 5578 5579 if ( 5580 type_required 5581 and not isinstance(this, exp.DataType) 5582 and not self._match_set(self.TYPE_TOKENS, advance=False) 5583 ): 5584 self._retreat(index) 5585 return self._parse_types() 5586 5587 return self._parse_column_def(this) 5588 5589 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5590 if not self._match_text_seq("AT", "TIME", "ZONE"): 5591 return this 5592 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5593 5594 def _parse_column(self) -> t.Optional[exp.Expression]: 5595 this = self._parse_column_reference() 5596 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5597 5598 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5599 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5600 5601 return column 5602 5603 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5604 this = self._parse_field() 5605 if ( 5606 not this 5607 and self._match(TokenType.VALUES, advance=False) 5608 and self.VALUES_FOLLOWED_BY_PAREN 5609 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5610 ): 5611 this = self._parse_id_var() 5612 5613 if isinstance(this, exp.Identifier): 5614 # We bubble up comments from the Identifier to the Column 5615 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5616 5617 return this 5618 5619 def _parse_colon_as_variant_extract( 5620 self, this: t.Optional[exp.Expression] 5621 ) -> t.Optional[exp.Expression]: 5622 casts = [] 5623 json_path = [] 5624 escape = None 5625 5626 while self._match(TokenType.COLON): 5627 start_index = self._index 5628 5629 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5630 path = self._parse_column_ops( 5631 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5632 ) 5633 5634 # The cast :: operator has a lower precedence than the extraction operator :, so 5635 # we rearrange the AST appropriately to avoid casting the JSON path 5636 while isinstance(path, exp.Cast): 5637 casts.append(path.to) 5638 path = path.this 5639 5640 if casts: 5641 dcolon_offset = next( 5642 i 5643 for i, t in enumerate(self._tokens[start_index:]) 5644 if t.token_type == TokenType.DCOLON 5645 ) 5646 end_token = self._tokens[start_index + dcolon_offset - 1] 5647 else: 5648 end_token = self._prev 5649 5650 if path: 5651 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5652 # it'll roundtrip to a string literal in GET_PATH 5653 if isinstance(path, exp.Identifier) and path.quoted: 5654 escape = True 5655 5656 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5657 5658 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5659 # Databricks transforms it back to the colon/dot notation 5660 if json_path: 5661 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5662 5663 if json_path_expr: 5664 json_path_expr.set("escape", escape) 5665 5666 this = self.expression( 5667 exp.JSONExtract, 5668 this=this, 5669 expression=json_path_expr, 5670 variant_extract=True, 5671 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5672 ) 5673 5674 while casts: 5675 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5676 5677 return this 5678 5679 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5680 return self._parse_types() 5681 5682 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5683 this = self._parse_bracket(this) 5684 5685 while self._match_set(self.COLUMN_OPERATORS): 5686 op_token = self._prev.token_type 5687 op = self.COLUMN_OPERATORS.get(op_token) 5688 5689 if op_token in self.CAST_COLUMN_OPERATORS: 5690 field = self._parse_dcolon() 5691 if not field: 5692 self.raise_error("Expected type") 5693 elif op and self._curr: 5694 field = self._parse_column_reference() or self._parse_bracket() 5695 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5696 field = self._parse_column_ops(field) 5697 else: 5698 field = self._parse_field(any_token=True, anonymous_func=True) 5699 5700 # Function calls can be qualified, e.g., x.y.FOO() 5701 # This converts the final AST to a series of Dots leading to the function call 5702 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5703 if isinstance(field, (exp.Func, exp.Window)) and this: 5704 this = this.transform( 5705 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5706 ) 5707 5708 if op: 5709 this = op(self, this, field) 5710 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5711 this = self.expression( 5712 exp.Column, 5713 comments=this.comments, 5714 this=field, 5715 table=this.this, 5716 db=this.args.get("table"), 5717 catalog=this.args.get("db"), 5718 ) 5719 elif isinstance(field, exp.Window): 5720 # Move the exp.Dot's to the window's function 5721 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5722 field.set("this", window_func) 5723 this = field 5724 else: 5725 this = self.expression(exp.Dot, this=this, expression=field) 5726 5727 if field and field.comments: 5728 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5729 5730 this = self._parse_bracket(this) 5731 5732 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5733 5734 def _parse_paren(self) -> t.Optional[exp.Expression]: 5735 if not self._match(TokenType.L_PAREN): 5736 return None 5737 5738 comments = self._prev_comments 5739 query = self._parse_select() 5740 5741 if query: 5742 expressions = [query] 5743 else: 5744 expressions = self._parse_expressions() 5745 5746 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5747 5748 if not this and self._match(TokenType.R_PAREN, advance=False): 5749 this = self.expression(exp.Tuple) 5750 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5751 this = self._parse_subquery(this=this, parse_alias=False) 5752 elif isinstance(this, exp.Subquery): 5753 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5754 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5755 this = self.expression(exp.Tuple, expressions=expressions) 5756 else: 5757 this = self.expression(exp.Paren, this=this) 5758 5759 if this: 5760 this.add_comments(comments) 5761 5762 self._match_r_paren(expression=this) 5763 return this 5764 5765 def _parse_primary(self) -> t.Optional[exp.Expression]: 5766 if self._match_set(self.PRIMARY_PARSERS): 5767 token_type = self._prev.token_type 5768 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5769 5770 if token_type == TokenType.STRING: 5771 expressions = [primary] 5772 while self._match(TokenType.STRING): 5773 expressions.append(exp.Literal.string(self._prev.text)) 5774 5775 if len(expressions) > 1: 5776 return self.expression(exp.Concat, expressions=expressions) 5777 5778 return primary 5779 5780 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5781 return exp.Literal.number(f"0.{self._prev.text}") 5782 5783 return self._parse_paren() 5784 5785 def _parse_field( 5786 self, 5787 any_token: bool = False, 5788 tokens: t.Optional[t.Collection[TokenType]] = None, 5789 anonymous_func: bool = False, 5790 ) -> t.Optional[exp.Expression]: 5791 if anonymous_func: 5792 field = ( 5793 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5794 or self._parse_primary() 5795 ) 5796 else: 5797 field = self._parse_primary() or self._parse_function( 5798 anonymous=anonymous_func, any_token=any_token 5799 ) 5800 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5801 5802 def _parse_function( 5803 self, 5804 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5805 anonymous: bool = False, 5806 optional_parens: bool = True, 5807 any_token: bool = False, 5808 ) -> t.Optional[exp.Expression]: 5809 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5810 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5811 fn_syntax = False 5812 if ( 5813 self._match(TokenType.L_BRACE, advance=False) 5814 and self._next 5815 and self._next.text.upper() == "FN" 5816 ): 5817 self._advance(2) 5818 fn_syntax = True 5819 5820 func = self._parse_function_call( 5821 functions=functions, 5822 anonymous=anonymous, 5823 optional_parens=optional_parens, 5824 any_token=any_token, 5825 ) 5826 5827 if fn_syntax: 5828 self._match(TokenType.R_BRACE) 5829 5830 return func 5831 5832 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5833 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5834 5835 def _parse_function_call( 5836 self, 5837 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5838 anonymous: bool = False, 5839 optional_parens: bool = True, 5840 any_token: bool = False, 5841 ) -> t.Optional[exp.Expression]: 5842 if not self._curr: 5843 return None 5844 5845 comments = self._curr.comments 5846 prev = self._prev 5847 token = self._curr 5848 token_type = self._curr.token_type 5849 this = self._curr.text 5850 upper = this.upper() 5851 5852 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5853 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5854 self._advance() 5855 return self._parse_window(parser(self)) 5856 5857 if not self._next or self._next.token_type != TokenType.L_PAREN: 5858 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5859 self._advance() 5860 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5861 5862 return None 5863 5864 if any_token: 5865 if token_type in self.RESERVED_TOKENS: 5866 return None 5867 elif token_type not in self.FUNC_TOKENS: 5868 return None 5869 5870 self._advance(2) 5871 5872 parser = self.FUNCTION_PARSERS.get(upper) 5873 if parser and not anonymous: 5874 this = parser(self) 5875 else: 5876 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5877 5878 if subquery_predicate: 5879 expr = None 5880 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5881 expr = self._parse_select() 5882 self._match_r_paren() 5883 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5884 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5885 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5886 self._advance(-1) 5887 expr = self._parse_bitwise() 5888 5889 if expr: 5890 return self.expression(subquery_predicate, comments=comments, this=expr) 5891 5892 if functions is None: 5893 functions = self.FUNCTIONS 5894 5895 function = functions.get(upper) 5896 known_function = function and not anonymous 5897 5898 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5899 args = self._parse_function_args(alias) 5900 5901 post_func_comments = self._curr and self._curr.comments 5902 if known_function and post_func_comments: 5903 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5904 # call we'll construct it as exp.Anonymous, even if it's "known" 5905 if any( 5906 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5907 for comment in post_func_comments 5908 ): 5909 known_function = False 5910 5911 if alias and known_function: 5912 args = self._kv_to_prop_eq(args) 5913 5914 if known_function: 5915 func_builder = t.cast(t.Callable, function) 5916 5917 if "dialect" in func_builder.__code__.co_varnames: 5918 func = func_builder(args, dialect=self.dialect) 5919 else: 5920 func = func_builder(args) 5921 5922 func = self.validate_expression(func, args) 5923 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5924 func.meta["name"] = this 5925 5926 this = func 5927 else: 5928 if token_type == TokenType.IDENTIFIER: 5929 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5930 5931 this = self.expression(exp.Anonymous, this=this, expressions=args) 5932 this = this.update_positions(token) 5933 5934 if isinstance(this, exp.Expression): 5935 this.add_comments(comments) 5936 5937 self._match_r_paren(this) 5938 return self._parse_window(this) 5939 5940 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5941 return expression 5942 5943 def _kv_to_prop_eq( 5944 self, expressions: t.List[exp.Expression], parse_map: bool = False 5945 ) -> t.List[exp.Expression]: 5946 transformed = [] 5947 5948 for index, e in enumerate(expressions): 5949 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5950 if isinstance(e, exp.Alias): 5951 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5952 5953 if not isinstance(e, exp.PropertyEQ): 5954 e = self.expression( 5955 exp.PropertyEQ, 5956 this=e.this if parse_map else exp.to_identifier(e.this.name), 5957 expression=e.expression, 5958 ) 5959 5960 if isinstance(e.this, exp.Column): 5961 e.this.replace(e.this.this) 5962 else: 5963 e = self._to_prop_eq(e, index) 5964 5965 transformed.append(e) 5966 5967 return transformed 5968 5969 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5970 return self._parse_statement() 5971 5972 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5973 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5974 5975 def _parse_user_defined_function( 5976 self, kind: t.Optional[TokenType] = None 5977 ) -> t.Optional[exp.Expression]: 5978 this = self._parse_table_parts(schema=True) 5979 5980 if not self._match(TokenType.L_PAREN): 5981 return this 5982 5983 expressions = self._parse_csv(self._parse_function_parameter) 5984 self._match_r_paren() 5985 return self.expression( 5986 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5987 ) 5988 5989 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5990 literal = self._parse_primary() 5991 if literal: 5992 return self.expression(exp.Introducer, this=token.text, expression=literal) 5993 5994 return self._identifier_expression(token) 5995 5996 def _parse_session_parameter(self) -> exp.SessionParameter: 5997 kind = None 5998 this = self._parse_id_var() or self._parse_primary() 5999 6000 if this and self._match(TokenType.DOT): 6001 kind = this.name 6002 this = self._parse_var() or self._parse_primary() 6003 6004 return self.expression(exp.SessionParameter, this=this, kind=kind) 6005 6006 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6007 return self._parse_id_var() 6008 6009 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6010 index = self._index 6011 6012 if self._match(TokenType.L_PAREN): 6013 expressions = t.cast( 6014 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6015 ) 6016 6017 if not self._match(TokenType.R_PAREN): 6018 self._retreat(index) 6019 else: 6020 expressions = [self._parse_lambda_arg()] 6021 6022 if self._match_set(self.LAMBDAS): 6023 return self.LAMBDAS[self._prev.token_type](self, expressions) 6024 6025 self._retreat(index) 6026 6027 this: t.Optional[exp.Expression] 6028 6029 if self._match(TokenType.DISTINCT): 6030 this = self.expression( 6031 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6032 ) 6033 else: 6034 this = self._parse_select_or_expression(alias=alias) 6035 6036 return self._parse_limit( 6037 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6038 ) 6039 6040 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6041 index = self._index 6042 if not self._match(TokenType.L_PAREN): 6043 return this 6044 6045 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6046 # expr can be of both types 6047 if self._match_set(self.SELECT_START_TOKENS): 6048 self._retreat(index) 6049 return this 6050 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6051 self._match_r_paren() 6052 return self.expression(exp.Schema, this=this, expressions=args) 6053 6054 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6055 return self._parse_column_def(self._parse_field(any_token=True)) 6056 6057 def _parse_column_def( 6058 self, this: t.Optional[exp.Expression], computed_column: bool = True 6059 ) -> t.Optional[exp.Expression]: 6060 # column defs are not really columns, they're identifiers 6061 if isinstance(this, exp.Column): 6062 this = this.this 6063 6064 if not computed_column: 6065 self._match(TokenType.ALIAS) 6066 6067 kind = self._parse_types(schema=True) 6068 6069 if self._match_text_seq("FOR", "ORDINALITY"): 6070 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6071 6072 constraints: t.List[exp.Expression] = [] 6073 6074 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6075 ("ALIAS", "MATERIALIZED") 6076 ): 6077 persisted = self._prev.text.upper() == "MATERIALIZED" 6078 constraint_kind = exp.ComputedColumnConstraint( 6079 this=self._parse_assignment(), 6080 persisted=persisted or self._match_text_seq("PERSISTED"), 6081 data_type=exp.Var(this="AUTO") 6082 if self._match_text_seq("AUTO") 6083 else self._parse_types(), 6084 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6085 ) 6086 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6087 elif ( 6088 kind 6089 and self._match(TokenType.ALIAS, advance=False) 6090 and ( 6091 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6092 or (self._next and self._next.token_type == TokenType.L_PAREN) 6093 ) 6094 ): 6095 self._advance() 6096 constraints.append( 6097 self.expression( 6098 exp.ColumnConstraint, 6099 kind=exp.ComputedColumnConstraint( 6100 this=self._parse_disjunction(), 6101 persisted=self._match_texts(("STORED", "VIRTUAL")) 6102 and self._prev.text.upper() == "STORED", 6103 ), 6104 ) 6105 ) 6106 6107 while True: 6108 constraint = self._parse_column_constraint() 6109 if not constraint: 6110 break 6111 constraints.append(constraint) 6112 6113 if not kind and not constraints: 6114 return this 6115 6116 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6117 6118 def _parse_auto_increment( 6119 self, 6120 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6121 start = None 6122 increment = None 6123 order = None 6124 6125 if self._match(TokenType.L_PAREN, advance=False): 6126 args = self._parse_wrapped_csv(self._parse_bitwise) 6127 start = seq_get(args, 0) 6128 increment = seq_get(args, 1) 6129 elif self._match_text_seq("START"): 6130 start = self._parse_bitwise() 6131 self._match_text_seq("INCREMENT") 6132 increment = self._parse_bitwise() 6133 if self._match_text_seq("ORDER"): 6134 order = True 6135 elif self._match_text_seq("NOORDER"): 6136 order = False 6137 6138 if start and increment: 6139 return exp.GeneratedAsIdentityColumnConstraint( 6140 start=start, increment=increment, this=False, order=order 6141 ) 6142 6143 return exp.AutoIncrementColumnConstraint() 6144 6145 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6146 if not self._match_text_seq("REFRESH"): 6147 self._retreat(self._index - 1) 6148 return None 6149 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6150 6151 def _parse_compress(self) -> exp.CompressColumnConstraint: 6152 if self._match(TokenType.L_PAREN, advance=False): 6153 return self.expression( 6154 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6155 ) 6156 6157 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6158 6159 def _parse_generated_as_identity( 6160 self, 6161 ) -> ( 6162 exp.GeneratedAsIdentityColumnConstraint 6163 | exp.ComputedColumnConstraint 6164 | exp.GeneratedAsRowColumnConstraint 6165 ): 6166 if self._match_text_seq("BY", "DEFAULT"): 6167 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6168 this = self.expression( 6169 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6170 ) 6171 else: 6172 self._match_text_seq("ALWAYS") 6173 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6174 6175 self._match(TokenType.ALIAS) 6176 6177 if self._match_text_seq("ROW"): 6178 start = self._match_text_seq("START") 6179 if not start: 6180 self._match(TokenType.END) 6181 hidden = self._match_text_seq("HIDDEN") 6182 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6183 6184 identity = self._match_text_seq("IDENTITY") 6185 6186 if self._match(TokenType.L_PAREN): 6187 if self._match(TokenType.START_WITH): 6188 this.set("start", self._parse_bitwise()) 6189 if self._match_text_seq("INCREMENT", "BY"): 6190 this.set("increment", self._parse_bitwise()) 6191 if self._match_text_seq("MINVALUE"): 6192 this.set("minvalue", self._parse_bitwise()) 6193 if self._match_text_seq("MAXVALUE"): 6194 this.set("maxvalue", self._parse_bitwise()) 6195 6196 if self._match_text_seq("CYCLE"): 6197 this.set("cycle", True) 6198 elif self._match_text_seq("NO", "CYCLE"): 6199 this.set("cycle", False) 6200 6201 if not identity: 6202 this.set("expression", self._parse_range()) 6203 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6204 args = self._parse_csv(self._parse_bitwise) 6205 this.set("start", seq_get(args, 0)) 6206 this.set("increment", seq_get(args, 1)) 6207 6208 self._match_r_paren() 6209 6210 return this 6211 6212 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6213 self._match_text_seq("LENGTH") 6214 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6215 6216 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6217 if self._match_text_seq("NULL"): 6218 return self.expression(exp.NotNullColumnConstraint) 6219 if self._match_text_seq("CASESPECIFIC"): 6220 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6221 if self._match_text_seq("FOR", "REPLICATION"): 6222 return self.expression(exp.NotForReplicationColumnConstraint) 6223 6224 # Unconsume the `NOT` token 6225 self._retreat(self._index - 1) 6226 return None 6227 6228 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6229 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6230 6231 procedure_option_follows = ( 6232 self._match(TokenType.WITH, advance=False) 6233 and self._next 6234 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6235 ) 6236 6237 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6238 return self.expression( 6239 exp.ColumnConstraint, 6240 this=this, 6241 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6242 ) 6243 6244 return this 6245 6246 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6247 if not self._match(TokenType.CONSTRAINT): 6248 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6249 6250 return self.expression( 6251 exp.Constraint, 6252 this=self._parse_id_var(), 6253 expressions=self._parse_unnamed_constraints(), 6254 ) 6255 6256 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6257 constraints = [] 6258 while True: 6259 constraint = self._parse_unnamed_constraint() or self._parse_function() 6260 if not constraint: 6261 break 6262 constraints.append(constraint) 6263 6264 return constraints 6265 6266 def _parse_unnamed_constraint( 6267 self, constraints: t.Optional[t.Collection[str]] = None 6268 ) -> t.Optional[exp.Expression]: 6269 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6270 constraints or self.CONSTRAINT_PARSERS 6271 ): 6272 return None 6273 6274 constraint = self._prev.text.upper() 6275 if constraint not in self.CONSTRAINT_PARSERS: 6276 self.raise_error(f"No parser found for schema constraint {constraint}.") 6277 6278 return self.CONSTRAINT_PARSERS[constraint](self) 6279 6280 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6281 return self._parse_id_var(any_token=False) 6282 6283 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6284 self._match_texts(("KEY", "INDEX")) 6285 return self.expression( 6286 exp.UniqueColumnConstraint, 6287 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6288 this=self._parse_schema(self._parse_unique_key()), 6289 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6290 on_conflict=self._parse_on_conflict(), 6291 options=self._parse_key_constraint_options(), 6292 ) 6293 6294 def _parse_key_constraint_options(self) -> t.List[str]: 6295 options = [] 6296 while True: 6297 if not self._curr: 6298 break 6299 6300 if self._match(TokenType.ON): 6301 action = None 6302 on = self._advance_any() and self._prev.text 6303 6304 if self._match_text_seq("NO", "ACTION"): 6305 action = "NO ACTION" 6306 elif self._match_text_seq("CASCADE"): 6307 action = "CASCADE" 6308 elif self._match_text_seq("RESTRICT"): 6309 action = "RESTRICT" 6310 elif self._match_pair(TokenType.SET, TokenType.NULL): 6311 action = "SET NULL" 6312 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6313 action = "SET DEFAULT" 6314 else: 6315 self.raise_error("Invalid key constraint") 6316 6317 options.append(f"ON {on} {action}") 6318 else: 6319 var = self._parse_var_from_options( 6320 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6321 ) 6322 if not var: 6323 break 6324 options.append(var.name) 6325 6326 return options 6327 6328 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6329 if match and not self._match(TokenType.REFERENCES): 6330 return None 6331 6332 expressions = None 6333 this = self._parse_table(schema=True) 6334 options = self._parse_key_constraint_options() 6335 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6336 6337 def _parse_foreign_key(self) -> exp.ForeignKey: 6338 expressions = ( 6339 self._parse_wrapped_id_vars() 6340 if not self._match(TokenType.REFERENCES, advance=False) 6341 else None 6342 ) 6343 reference = self._parse_references() 6344 on_options = {} 6345 6346 while self._match(TokenType.ON): 6347 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6348 self.raise_error("Expected DELETE or UPDATE") 6349 6350 kind = self._prev.text.lower() 6351 6352 if self._match_text_seq("NO", "ACTION"): 6353 action = "NO ACTION" 6354 elif self._match(TokenType.SET): 6355 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6356 action = "SET " + self._prev.text.upper() 6357 else: 6358 self._advance() 6359 action = self._prev.text.upper() 6360 6361 on_options[kind] = action 6362 6363 return self.expression( 6364 exp.ForeignKey, 6365 expressions=expressions, 6366 reference=reference, 6367 options=self._parse_key_constraint_options(), 6368 **on_options, # type: ignore 6369 ) 6370 6371 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6372 return self._parse_ordered() or self._parse_field() 6373 6374 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6375 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6376 self._retreat(self._index - 1) 6377 return None 6378 6379 id_vars = self._parse_wrapped_id_vars() 6380 return self.expression( 6381 exp.PeriodForSystemTimeConstraint, 6382 this=seq_get(id_vars, 0), 6383 expression=seq_get(id_vars, 1), 6384 ) 6385 6386 def _parse_primary_key( 6387 self, wrapped_optional: bool = False, in_props: bool = False 6388 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6389 desc = ( 6390 self._match_set((TokenType.ASC, TokenType.DESC)) 6391 and self._prev.token_type == TokenType.DESC 6392 ) 6393 6394 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6395 return self.expression( 6396 exp.PrimaryKeyColumnConstraint, 6397 desc=desc, 6398 options=self._parse_key_constraint_options(), 6399 ) 6400 6401 expressions = self._parse_wrapped_csv( 6402 self._parse_primary_key_part, optional=wrapped_optional 6403 ) 6404 6405 return self.expression( 6406 exp.PrimaryKey, 6407 expressions=expressions, 6408 include=self._parse_index_params(), 6409 options=self._parse_key_constraint_options(), 6410 ) 6411 6412 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6413 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6414 6415 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6416 """ 6417 Parses a datetime column in ODBC format. We parse the column into the corresponding 6418 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6419 same as we did for `DATE('yyyy-mm-dd')`. 6420 6421 Reference: 6422 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6423 """ 6424 self._match(TokenType.VAR) 6425 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6426 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6427 if not self._match(TokenType.R_BRACE): 6428 self.raise_error("Expected }") 6429 return expression 6430 6431 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6432 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6433 return this 6434 6435 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6436 map_token = seq_get(self._tokens, self._index - 2) 6437 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6438 else: 6439 parse_map = False 6440 6441 bracket_kind = self._prev.token_type 6442 if ( 6443 bracket_kind == TokenType.L_BRACE 6444 and self._curr 6445 and self._curr.token_type == TokenType.VAR 6446 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6447 ): 6448 return self._parse_odbc_datetime_literal() 6449 6450 expressions = self._parse_csv( 6451 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6452 ) 6453 6454 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6455 self.raise_error("Expected ]") 6456 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6457 self.raise_error("Expected }") 6458 6459 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6460 if bracket_kind == TokenType.L_BRACE: 6461 this = self.expression( 6462 exp.Struct, 6463 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6464 ) 6465 elif not this: 6466 this = build_array_constructor( 6467 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6468 ) 6469 else: 6470 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6471 if constructor_type: 6472 return build_array_constructor( 6473 constructor_type, 6474 args=expressions, 6475 bracket_kind=bracket_kind, 6476 dialect=self.dialect, 6477 ) 6478 6479 expressions = apply_index_offset( 6480 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6481 ) 6482 this = self.expression( 6483 exp.Bracket, 6484 this=this, 6485 expressions=expressions, 6486 comments=this.pop_comments(), 6487 ) 6488 6489 self._add_comments(this) 6490 return self._parse_bracket(this) 6491 6492 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6493 if self._match(TokenType.COLON): 6494 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6495 return this 6496 6497 def _parse_case(self) -> t.Optional[exp.Expression]: 6498 if self._match(TokenType.DOT, advance=False): 6499 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6500 self._retreat(self._index - 1) 6501 return None 6502 6503 ifs = [] 6504 default = None 6505 6506 comments = self._prev_comments 6507 expression = self._parse_assignment() 6508 6509 while self._match(TokenType.WHEN): 6510 this = self._parse_assignment() 6511 self._match(TokenType.THEN) 6512 then = self._parse_assignment() 6513 ifs.append(self.expression(exp.If, this=this, true=then)) 6514 6515 if self._match(TokenType.ELSE): 6516 default = self._parse_assignment() 6517 6518 if not self._match(TokenType.END): 6519 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6520 default = exp.column("interval") 6521 else: 6522 self.raise_error("Expected END after CASE", self._prev) 6523 6524 return self.expression( 6525 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6526 ) 6527 6528 def _parse_if(self) -> t.Optional[exp.Expression]: 6529 if self._match(TokenType.L_PAREN): 6530 args = self._parse_csv( 6531 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6532 ) 6533 this = self.validate_expression(exp.If.from_arg_list(args), args) 6534 self._match_r_paren() 6535 else: 6536 index = self._index - 1 6537 6538 if self.NO_PAREN_IF_COMMANDS and index == 0: 6539 return self._parse_as_command(self._prev) 6540 6541 condition = self._parse_assignment() 6542 6543 if not condition: 6544 self._retreat(index) 6545 return None 6546 6547 self._match(TokenType.THEN) 6548 true = self._parse_assignment() 6549 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6550 self._match(TokenType.END) 6551 this = self.expression(exp.If, this=condition, true=true, false=false) 6552 6553 return this 6554 6555 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6556 if not self._match_text_seq("VALUE", "FOR"): 6557 self._retreat(self._index - 1) 6558 return None 6559 6560 return self.expression( 6561 exp.NextValueFor, 6562 this=self._parse_column(), 6563 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6564 ) 6565 6566 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6567 this = self._parse_function() or self._parse_var_or_string(upper=True) 6568 6569 if self._match(TokenType.FROM): 6570 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6571 6572 if not self._match(TokenType.COMMA): 6573 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6574 6575 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6576 6577 def _parse_gap_fill(self) -> exp.GapFill: 6578 self._match(TokenType.TABLE) 6579 this = self._parse_table() 6580 6581 self._match(TokenType.COMMA) 6582 args = [this, *self._parse_csv(self._parse_lambda)] 6583 6584 gap_fill = exp.GapFill.from_arg_list(args) 6585 return self.validate_expression(gap_fill, args) 6586 6587 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6588 this = self._parse_assignment() 6589 6590 if not self._match(TokenType.ALIAS): 6591 if self._match(TokenType.COMMA): 6592 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6593 6594 self.raise_error("Expected AS after CAST") 6595 6596 fmt = None 6597 to = self._parse_types() 6598 6599 default = self._match(TokenType.DEFAULT) 6600 if default: 6601 default = self._parse_bitwise() 6602 self._match_text_seq("ON", "CONVERSION", "ERROR") 6603 6604 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6605 fmt_string = self._parse_string() 6606 fmt = self._parse_at_time_zone(fmt_string) 6607 6608 if not to: 6609 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6610 if to.this in exp.DataType.TEMPORAL_TYPES: 6611 this = self.expression( 6612 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6613 this=this, 6614 format=exp.Literal.string( 6615 format_time( 6616 fmt_string.this if fmt_string else "", 6617 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6618 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6619 ) 6620 ), 6621 safe=safe, 6622 ) 6623 6624 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6625 this.set("zone", fmt.args["zone"]) 6626 return this 6627 elif not to: 6628 self.raise_error("Expected TYPE after CAST") 6629 elif isinstance(to, exp.Identifier): 6630 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6631 elif to.this == exp.DataType.Type.CHAR: 6632 if self._match(TokenType.CHARACTER_SET): 6633 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6634 6635 return self.build_cast( 6636 strict=strict, 6637 this=this, 6638 to=to, 6639 format=fmt, 6640 safe=safe, 6641 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6642 default=default, 6643 ) 6644 6645 def _parse_string_agg(self) -> exp.GroupConcat: 6646 if self._match(TokenType.DISTINCT): 6647 args: t.List[t.Optional[exp.Expression]] = [ 6648 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6649 ] 6650 if self._match(TokenType.COMMA): 6651 args.extend(self._parse_csv(self._parse_assignment)) 6652 else: 6653 args = self._parse_csv(self._parse_assignment) # type: ignore 6654 6655 if self._match_text_seq("ON", "OVERFLOW"): 6656 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6657 if self._match_text_seq("ERROR"): 6658 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6659 else: 6660 self._match_text_seq("TRUNCATE") 6661 on_overflow = self.expression( 6662 exp.OverflowTruncateBehavior, 6663 this=self._parse_string(), 6664 with_count=( 6665 self._match_text_seq("WITH", "COUNT") 6666 or not self._match_text_seq("WITHOUT", "COUNT") 6667 ), 6668 ) 6669 else: 6670 on_overflow = None 6671 6672 index = self._index 6673 if not self._match(TokenType.R_PAREN) and args: 6674 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6675 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6676 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6677 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6678 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6679 6680 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6681 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6682 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6683 if not self._match_text_seq("WITHIN", "GROUP"): 6684 self._retreat(index) 6685 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6686 6687 # The corresponding match_r_paren will be called in parse_function (caller) 6688 self._match_l_paren() 6689 6690 return self.expression( 6691 exp.GroupConcat, 6692 this=self._parse_order(this=seq_get(args, 0)), 6693 separator=seq_get(args, 1), 6694 on_overflow=on_overflow, 6695 ) 6696 6697 def _parse_convert( 6698 self, strict: bool, safe: t.Optional[bool] = None 6699 ) -> t.Optional[exp.Expression]: 6700 this = self._parse_bitwise() 6701 6702 if self._match(TokenType.USING): 6703 to: t.Optional[exp.Expression] = self.expression( 6704 exp.CharacterSet, this=self._parse_var() 6705 ) 6706 elif self._match(TokenType.COMMA): 6707 to = self._parse_types() 6708 else: 6709 to = None 6710 6711 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6712 6713 def _parse_xml_table(self) -> exp.XMLTable: 6714 namespaces = None 6715 passing = None 6716 columns = None 6717 6718 if self._match_text_seq("XMLNAMESPACES", "("): 6719 namespaces = self._parse_xml_namespace() 6720 self._match_text_seq(")", ",") 6721 6722 this = self._parse_string() 6723 6724 if self._match_text_seq("PASSING"): 6725 # The BY VALUE keywords are optional and are provided for semantic clarity 6726 self._match_text_seq("BY", "VALUE") 6727 passing = self._parse_csv(self._parse_column) 6728 6729 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6730 6731 if self._match_text_seq("COLUMNS"): 6732 columns = self._parse_csv(self._parse_field_def) 6733 6734 return self.expression( 6735 exp.XMLTable, 6736 this=this, 6737 namespaces=namespaces, 6738 passing=passing, 6739 columns=columns, 6740 by_ref=by_ref, 6741 ) 6742 6743 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6744 namespaces = [] 6745 6746 while True: 6747 if self._match(TokenType.DEFAULT): 6748 uri = self._parse_string() 6749 else: 6750 uri = self._parse_alias(self._parse_string()) 6751 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6752 if not self._match(TokenType.COMMA): 6753 break 6754 6755 return namespaces 6756 6757 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6758 args = self._parse_csv(self._parse_assignment) 6759 6760 if len(args) < 3: 6761 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6762 6763 return self.expression(exp.DecodeCase, expressions=args) 6764 6765 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6766 self._match_text_seq("KEY") 6767 key = self._parse_column() 6768 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6769 self._match_text_seq("VALUE") 6770 value = self._parse_bitwise() 6771 6772 if not key and not value: 6773 return None 6774 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6775 6776 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6777 if not this or not self._match_text_seq("FORMAT", "JSON"): 6778 return this 6779 6780 return self.expression(exp.FormatJson, this=this) 6781 6782 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6783 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6784 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6785 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6786 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6787 else: 6788 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6789 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6790 6791 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6792 6793 if not empty and not error and not null: 6794 return None 6795 6796 return self.expression( 6797 exp.OnCondition, 6798 empty=empty, 6799 error=error, 6800 null=null, 6801 ) 6802 6803 def _parse_on_handling( 6804 self, on: str, *values: str 6805 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6806 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6807 for value in values: 6808 if self._match_text_seq(value, "ON", on): 6809 return f"{value} ON {on}" 6810 6811 index = self._index 6812 if self._match(TokenType.DEFAULT): 6813 default_value = self._parse_bitwise() 6814 if self._match_text_seq("ON", on): 6815 return default_value 6816 6817 self._retreat(index) 6818 6819 return None 6820 6821 @t.overload 6822 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6823 6824 @t.overload 6825 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6826 6827 def _parse_json_object(self, agg=False): 6828 star = self._parse_star() 6829 expressions = ( 6830 [star] 6831 if star 6832 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6833 ) 6834 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6835 6836 unique_keys = None 6837 if self._match_text_seq("WITH", "UNIQUE"): 6838 unique_keys = True 6839 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6840 unique_keys = False 6841 6842 self._match_text_seq("KEYS") 6843 6844 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6845 self._parse_type() 6846 ) 6847 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6848 6849 return self.expression( 6850 exp.JSONObjectAgg if agg else exp.JSONObject, 6851 expressions=expressions, 6852 null_handling=null_handling, 6853 unique_keys=unique_keys, 6854 return_type=return_type, 6855 encoding=encoding, 6856 ) 6857 6858 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6859 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6860 if not self._match_text_seq("NESTED"): 6861 this = self._parse_id_var() 6862 kind = self._parse_types(allow_identifiers=False) 6863 nested = None 6864 else: 6865 this = None 6866 kind = None 6867 nested = True 6868 6869 path = self._match_text_seq("PATH") and self._parse_string() 6870 nested_schema = nested and self._parse_json_schema() 6871 6872 return self.expression( 6873 exp.JSONColumnDef, 6874 this=this, 6875 kind=kind, 6876 path=path, 6877 nested_schema=nested_schema, 6878 ) 6879 6880 def _parse_json_schema(self) -> exp.JSONSchema: 6881 self._match_text_seq("COLUMNS") 6882 return self.expression( 6883 exp.JSONSchema, 6884 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6885 ) 6886 6887 def _parse_json_table(self) -> exp.JSONTable: 6888 this = self._parse_format_json(self._parse_bitwise()) 6889 path = self._match(TokenType.COMMA) and self._parse_string() 6890 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6891 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6892 schema = self._parse_json_schema() 6893 6894 return exp.JSONTable( 6895 this=this, 6896 schema=schema, 6897 path=path, 6898 error_handling=error_handling, 6899 empty_handling=empty_handling, 6900 ) 6901 6902 def _parse_match_against(self) -> exp.MatchAgainst: 6903 if self._match_text_seq("TABLE"): 6904 # parse SingleStore MATCH(TABLE ...) syntax 6905 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6906 expressions = [] 6907 table = self._parse_table() 6908 if table: 6909 expressions = [table] 6910 else: 6911 expressions = self._parse_csv(self._parse_column) 6912 6913 self._match_text_seq(")", "AGAINST", "(") 6914 6915 this = self._parse_string() 6916 6917 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6918 modifier = "IN NATURAL LANGUAGE MODE" 6919 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6920 modifier = f"{modifier} WITH QUERY EXPANSION" 6921 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6922 modifier = "IN BOOLEAN MODE" 6923 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6924 modifier = "WITH QUERY EXPANSION" 6925 else: 6926 modifier = None 6927 6928 return self.expression( 6929 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6930 ) 6931 6932 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6933 def _parse_open_json(self) -> exp.OpenJSON: 6934 this = self._parse_bitwise() 6935 path = self._match(TokenType.COMMA) and self._parse_string() 6936 6937 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6938 this = self._parse_field(any_token=True) 6939 kind = self._parse_types() 6940 path = self._parse_string() 6941 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6942 6943 return self.expression( 6944 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6945 ) 6946 6947 expressions = None 6948 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6949 self._match_l_paren() 6950 expressions = self._parse_csv(_parse_open_json_column_def) 6951 6952 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6953 6954 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6955 args = self._parse_csv(self._parse_bitwise) 6956 6957 if self._match(TokenType.IN): 6958 return self.expression( 6959 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6960 ) 6961 6962 if haystack_first: 6963 haystack = seq_get(args, 0) 6964 needle = seq_get(args, 1) 6965 else: 6966 haystack = seq_get(args, 1) 6967 needle = seq_get(args, 0) 6968 6969 return self.expression( 6970 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6971 ) 6972 6973 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6974 args = self._parse_csv(self._parse_table) 6975 return exp.JoinHint(this=func_name.upper(), expressions=args) 6976 6977 def _parse_substring(self) -> exp.Substring: 6978 # Postgres supports the form: substring(string [from int] [for int]) 6979 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6980 6981 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6982 6983 if self._match(TokenType.FROM): 6984 args.append(self._parse_bitwise()) 6985 if self._match(TokenType.FOR): 6986 if len(args) == 1: 6987 args.append(exp.Literal.number(1)) 6988 args.append(self._parse_bitwise()) 6989 6990 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6991 6992 def _parse_trim(self) -> exp.Trim: 6993 # https://www.w3resource.com/sql/character-functions/trim.php 6994 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6995 6996 position = None 6997 collation = None 6998 expression = None 6999 7000 if self._match_texts(self.TRIM_TYPES): 7001 position = self._prev.text.upper() 7002 7003 this = self._parse_bitwise() 7004 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7005 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7006 expression = self._parse_bitwise() 7007 7008 if invert_order: 7009 this, expression = expression, this 7010 7011 if self._match(TokenType.COLLATE): 7012 collation = self._parse_bitwise() 7013 7014 return self.expression( 7015 exp.Trim, this=this, position=position, expression=expression, collation=collation 7016 ) 7017 7018 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7019 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7020 7021 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7022 return self._parse_window(self._parse_id_var(), alias=True) 7023 7024 def _parse_respect_or_ignore_nulls( 7025 self, this: t.Optional[exp.Expression] 7026 ) -> t.Optional[exp.Expression]: 7027 if self._match_text_seq("IGNORE", "NULLS"): 7028 return self.expression(exp.IgnoreNulls, this=this) 7029 if self._match_text_seq("RESPECT", "NULLS"): 7030 return self.expression(exp.RespectNulls, this=this) 7031 return this 7032 7033 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7034 if self._match(TokenType.HAVING): 7035 self._match_texts(("MAX", "MIN")) 7036 max = self._prev.text.upper() != "MIN" 7037 return self.expression( 7038 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7039 ) 7040 7041 return this 7042 7043 def _parse_window( 7044 self, this: t.Optional[exp.Expression], alias: bool = False 7045 ) -> t.Optional[exp.Expression]: 7046 func = this 7047 comments = func.comments if isinstance(func, exp.Expression) else None 7048 7049 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7050 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7051 if self._match_text_seq("WITHIN", "GROUP"): 7052 order = self._parse_wrapped(self._parse_order) 7053 this = self.expression(exp.WithinGroup, this=this, expression=order) 7054 7055 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7056 self._match(TokenType.WHERE) 7057 this = self.expression( 7058 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7059 ) 7060 self._match_r_paren() 7061 7062 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7063 # Some dialects choose to implement and some do not. 7064 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7065 7066 # There is some code above in _parse_lambda that handles 7067 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7068 7069 # The below changes handle 7070 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7071 7072 # Oracle allows both formats 7073 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7074 # and Snowflake chose to do the same for familiarity 7075 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7076 if isinstance(this, exp.AggFunc): 7077 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7078 7079 if ignore_respect and ignore_respect is not this: 7080 ignore_respect.replace(ignore_respect.this) 7081 this = self.expression(ignore_respect.__class__, this=this) 7082 7083 this = self._parse_respect_or_ignore_nulls(this) 7084 7085 # bigquery select from window x AS (partition by ...) 7086 if alias: 7087 over = None 7088 self._match(TokenType.ALIAS) 7089 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7090 return this 7091 else: 7092 over = self._prev.text.upper() 7093 7094 if comments and isinstance(func, exp.Expression): 7095 func.pop_comments() 7096 7097 if not self._match(TokenType.L_PAREN): 7098 return self.expression( 7099 exp.Window, 7100 comments=comments, 7101 this=this, 7102 alias=self._parse_id_var(False), 7103 over=over, 7104 ) 7105 7106 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7107 7108 first = self._match(TokenType.FIRST) 7109 if self._match_text_seq("LAST"): 7110 first = False 7111 7112 partition, order = self._parse_partition_and_order() 7113 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7114 7115 if kind: 7116 self._match(TokenType.BETWEEN) 7117 start = self._parse_window_spec() 7118 7119 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7120 exclude = ( 7121 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7122 if self._match_text_seq("EXCLUDE") 7123 else None 7124 ) 7125 7126 spec = self.expression( 7127 exp.WindowSpec, 7128 kind=kind, 7129 start=start["value"], 7130 start_side=start["side"], 7131 end=end.get("value"), 7132 end_side=end.get("side"), 7133 exclude=exclude, 7134 ) 7135 else: 7136 spec = None 7137 7138 self._match_r_paren() 7139 7140 window = self.expression( 7141 exp.Window, 7142 comments=comments, 7143 this=this, 7144 partition_by=partition, 7145 order=order, 7146 spec=spec, 7147 alias=window_alias, 7148 over=over, 7149 first=first, 7150 ) 7151 7152 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7153 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7154 return self._parse_window(window, alias=alias) 7155 7156 return window 7157 7158 def _parse_partition_and_order( 7159 self, 7160 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7161 return self._parse_partition_by(), self._parse_order() 7162 7163 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7164 self._match(TokenType.BETWEEN) 7165 7166 return { 7167 "value": ( 7168 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7169 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7170 or self._parse_type() 7171 ), 7172 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7173 } 7174 7175 def _parse_alias( 7176 self, this: t.Optional[exp.Expression], explicit: bool = False 7177 ) -> t.Optional[exp.Expression]: 7178 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7179 # so this section tries to parse the clause version and if it fails, it treats the token 7180 # as an identifier (alias) 7181 if self._can_parse_limit_or_offset(): 7182 return this 7183 7184 any_token = self._match(TokenType.ALIAS) 7185 comments = self._prev_comments or [] 7186 7187 if explicit and not any_token: 7188 return this 7189 7190 if self._match(TokenType.L_PAREN): 7191 aliases = self.expression( 7192 exp.Aliases, 7193 comments=comments, 7194 this=this, 7195 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7196 ) 7197 self._match_r_paren(aliases) 7198 return aliases 7199 7200 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7201 self.STRING_ALIASES and self._parse_string_as_identifier() 7202 ) 7203 7204 if alias: 7205 comments.extend(alias.pop_comments()) 7206 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7207 column = this.this 7208 7209 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7210 if not this.comments and column and column.comments: 7211 this.comments = column.pop_comments() 7212 7213 return this 7214 7215 def _parse_id_var( 7216 self, 7217 any_token: bool = True, 7218 tokens: t.Optional[t.Collection[TokenType]] = None, 7219 ) -> t.Optional[exp.Expression]: 7220 expression = self._parse_identifier() 7221 if not expression and ( 7222 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7223 ): 7224 quoted = self._prev.token_type == TokenType.STRING 7225 expression = self._identifier_expression(quoted=quoted) 7226 7227 return expression 7228 7229 def _parse_string(self) -> t.Optional[exp.Expression]: 7230 if self._match_set(self.STRING_PARSERS): 7231 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7232 return self._parse_placeholder() 7233 7234 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7235 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7236 if output: 7237 output.update_positions(self._prev) 7238 return output 7239 7240 def _parse_number(self) -> t.Optional[exp.Expression]: 7241 if self._match_set(self.NUMERIC_PARSERS): 7242 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7243 return self._parse_placeholder() 7244 7245 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7246 if self._match(TokenType.IDENTIFIER): 7247 return self._identifier_expression(quoted=True) 7248 return self._parse_placeholder() 7249 7250 def _parse_var( 7251 self, 7252 any_token: bool = False, 7253 tokens: t.Optional[t.Collection[TokenType]] = None, 7254 upper: bool = False, 7255 ) -> t.Optional[exp.Expression]: 7256 if ( 7257 (any_token and self._advance_any()) 7258 or self._match(TokenType.VAR) 7259 or (self._match_set(tokens) if tokens else False) 7260 ): 7261 return self.expression( 7262 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7263 ) 7264 return self._parse_placeholder() 7265 7266 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7267 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7268 self._advance() 7269 return self._prev 7270 return None 7271 7272 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7273 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7274 7275 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7276 return self._parse_primary() or self._parse_var(any_token=True) 7277 7278 def _parse_null(self) -> t.Optional[exp.Expression]: 7279 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7280 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7281 return self._parse_placeholder() 7282 7283 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7284 if self._match(TokenType.TRUE): 7285 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7286 if self._match(TokenType.FALSE): 7287 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7288 return self._parse_placeholder() 7289 7290 def _parse_star(self) -> t.Optional[exp.Expression]: 7291 if self._match(TokenType.STAR): 7292 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7293 return self._parse_placeholder() 7294 7295 def _parse_parameter(self) -> exp.Parameter: 7296 this = self._parse_identifier() or self._parse_primary_or_var() 7297 return self.expression(exp.Parameter, this=this) 7298 7299 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7300 if self._match_set(self.PLACEHOLDER_PARSERS): 7301 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7302 if placeholder: 7303 return placeholder 7304 self._advance(-1) 7305 return None 7306 7307 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7308 if not self._match_texts(keywords): 7309 return None 7310 if self._match(TokenType.L_PAREN, advance=False): 7311 return self._parse_wrapped_csv(self._parse_expression) 7312 7313 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7314 return [expression] if expression else None 7315 7316 def _parse_csv( 7317 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7318 ) -> t.List[exp.Expression]: 7319 parse_result = parse_method() 7320 items = [parse_result] if parse_result is not None else [] 7321 7322 while self._match(sep): 7323 self._add_comments(parse_result) 7324 parse_result = parse_method() 7325 if parse_result is not None: 7326 items.append(parse_result) 7327 7328 return items 7329 7330 def _parse_tokens( 7331 self, parse_method: t.Callable, expressions: t.Dict 7332 ) -> t.Optional[exp.Expression]: 7333 this = parse_method() 7334 7335 while self._match_set(expressions): 7336 this = self.expression( 7337 expressions[self._prev.token_type], 7338 this=this, 7339 comments=self._prev_comments, 7340 expression=parse_method(), 7341 ) 7342 7343 return this 7344 7345 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7346 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7347 7348 def _parse_wrapped_csv( 7349 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7350 ) -> t.List[exp.Expression]: 7351 return self._parse_wrapped( 7352 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7353 ) 7354 7355 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7356 wrapped = self._match(TokenType.L_PAREN) 7357 if not wrapped and not optional: 7358 self.raise_error("Expecting (") 7359 parse_result = parse_method() 7360 if wrapped: 7361 self._match_r_paren() 7362 return parse_result 7363 7364 def _parse_expressions(self) -> t.List[exp.Expression]: 7365 return self._parse_csv(self._parse_expression) 7366 7367 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7368 return ( 7369 self._parse_set_operations( 7370 self._parse_alias(self._parse_assignment(), explicit=True) 7371 if alias 7372 else self._parse_assignment() 7373 ) 7374 or self._parse_select() 7375 ) 7376 7377 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7378 return self._parse_query_modifiers( 7379 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7380 ) 7381 7382 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7383 this = None 7384 if self._match_texts(self.TRANSACTION_KIND): 7385 this = self._prev.text 7386 7387 self._match_texts(("TRANSACTION", "WORK")) 7388 7389 modes = [] 7390 while True: 7391 mode = [] 7392 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7393 mode.append(self._prev.text) 7394 7395 if mode: 7396 modes.append(" ".join(mode)) 7397 if not self._match(TokenType.COMMA): 7398 break 7399 7400 return self.expression(exp.Transaction, this=this, modes=modes) 7401 7402 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7403 chain = None 7404 savepoint = None 7405 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7406 7407 self._match_texts(("TRANSACTION", "WORK")) 7408 7409 if self._match_text_seq("TO"): 7410 self._match_text_seq("SAVEPOINT") 7411 savepoint = self._parse_id_var() 7412 7413 if self._match(TokenType.AND): 7414 chain = not self._match_text_seq("NO") 7415 self._match_text_seq("CHAIN") 7416 7417 if is_rollback: 7418 return self.expression(exp.Rollback, savepoint=savepoint) 7419 7420 return self.expression(exp.Commit, chain=chain) 7421 7422 def _parse_refresh(self) -> exp.Refresh: 7423 self._match(TokenType.TABLE) 7424 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7425 7426 def _parse_column_def_with_exists(self): 7427 start = self._index 7428 self._match(TokenType.COLUMN) 7429 7430 exists_column = self._parse_exists(not_=True) 7431 expression = self._parse_field_def() 7432 7433 if not isinstance(expression, exp.ColumnDef): 7434 self._retreat(start) 7435 return None 7436 7437 expression.set("exists", exists_column) 7438 7439 return expression 7440 7441 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7442 if not self._prev.text.upper() == "ADD": 7443 return None 7444 7445 expression = self._parse_column_def_with_exists() 7446 if not expression: 7447 return None 7448 7449 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7450 if self._match_texts(("FIRST", "AFTER")): 7451 position = self._prev.text 7452 column_position = self.expression( 7453 exp.ColumnPosition, this=self._parse_column(), position=position 7454 ) 7455 expression.set("position", column_position) 7456 7457 return expression 7458 7459 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7460 drop = self._match(TokenType.DROP) and self._parse_drop() 7461 if drop and not isinstance(drop, exp.Command): 7462 drop.set("kind", drop.args.get("kind", "COLUMN")) 7463 return drop 7464 7465 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7466 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7467 return self.expression( 7468 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7469 ) 7470 7471 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7472 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7473 self._match_text_seq("ADD") 7474 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7475 return self.expression( 7476 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7477 ) 7478 7479 column_def = self._parse_add_column() 7480 if isinstance(column_def, exp.ColumnDef): 7481 return column_def 7482 7483 exists = self._parse_exists(not_=True) 7484 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7485 return self.expression( 7486 exp.AddPartition, 7487 exists=exists, 7488 this=self._parse_field(any_token=True), 7489 location=self._match_text_seq("LOCATION", advance=False) 7490 and self._parse_property(), 7491 ) 7492 7493 return None 7494 7495 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7496 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7497 or self._match_text_seq("COLUMNS") 7498 ): 7499 schema = self._parse_schema() 7500 7501 return ( 7502 ensure_list(schema) 7503 if schema 7504 else self._parse_csv(self._parse_column_def_with_exists) 7505 ) 7506 7507 return self._parse_csv(_parse_add_alteration) 7508 7509 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7510 if self._match_texts(self.ALTER_ALTER_PARSERS): 7511 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7512 7513 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7514 # keyword after ALTER we default to parsing this statement 7515 self._match(TokenType.COLUMN) 7516 column = self._parse_field(any_token=True) 7517 7518 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7519 return self.expression(exp.AlterColumn, this=column, drop=True) 7520 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7521 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7522 if self._match(TokenType.COMMENT): 7523 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7524 if self._match_text_seq("DROP", "NOT", "NULL"): 7525 return self.expression( 7526 exp.AlterColumn, 7527 this=column, 7528 drop=True, 7529 allow_null=True, 7530 ) 7531 if self._match_text_seq("SET", "NOT", "NULL"): 7532 return self.expression( 7533 exp.AlterColumn, 7534 this=column, 7535 allow_null=False, 7536 ) 7537 7538 if self._match_text_seq("SET", "VISIBLE"): 7539 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7540 if self._match_text_seq("SET", "INVISIBLE"): 7541 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7542 7543 self._match_text_seq("SET", "DATA") 7544 self._match_text_seq("TYPE") 7545 return self.expression( 7546 exp.AlterColumn, 7547 this=column, 7548 dtype=self._parse_types(), 7549 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7550 using=self._match(TokenType.USING) and self._parse_assignment(), 7551 ) 7552 7553 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7554 if self._match_texts(("ALL", "EVEN", "AUTO")): 7555 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7556 7557 self._match_text_seq("KEY", "DISTKEY") 7558 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7559 7560 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7561 if compound: 7562 self._match_text_seq("SORTKEY") 7563 7564 if self._match(TokenType.L_PAREN, advance=False): 7565 return self.expression( 7566 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7567 ) 7568 7569 self._match_texts(("AUTO", "NONE")) 7570 return self.expression( 7571 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7572 ) 7573 7574 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7575 index = self._index - 1 7576 7577 partition_exists = self._parse_exists() 7578 if self._match(TokenType.PARTITION, advance=False): 7579 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7580 7581 self._retreat(index) 7582 return self._parse_csv(self._parse_drop_column) 7583 7584 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7585 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7586 exists = self._parse_exists() 7587 old_column = self._parse_column() 7588 to = self._match_text_seq("TO") 7589 new_column = self._parse_column() 7590 7591 if old_column is None or to is None or new_column is None: 7592 return None 7593 7594 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7595 7596 self._match_text_seq("TO") 7597 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7598 7599 def _parse_alter_table_set(self) -> exp.AlterSet: 7600 alter_set = self.expression(exp.AlterSet) 7601 7602 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7603 "TABLE", "PROPERTIES" 7604 ): 7605 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7606 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7607 alter_set.set("expressions", [self._parse_assignment()]) 7608 elif self._match_texts(("LOGGED", "UNLOGGED")): 7609 alter_set.set("option", exp.var(self._prev.text.upper())) 7610 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7611 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7612 elif self._match_text_seq("LOCATION"): 7613 alter_set.set("location", self._parse_field()) 7614 elif self._match_text_seq("ACCESS", "METHOD"): 7615 alter_set.set("access_method", self._parse_field()) 7616 elif self._match_text_seq("TABLESPACE"): 7617 alter_set.set("tablespace", self._parse_field()) 7618 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7619 alter_set.set("file_format", [self._parse_field()]) 7620 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7621 alter_set.set("file_format", self._parse_wrapped_options()) 7622 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7623 alter_set.set("copy_options", self._parse_wrapped_options()) 7624 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7625 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7626 else: 7627 if self._match_text_seq("SERDE"): 7628 alter_set.set("serde", self._parse_field()) 7629 7630 properties = self._parse_wrapped(self._parse_properties, optional=True) 7631 alter_set.set("expressions", [properties]) 7632 7633 return alter_set 7634 7635 def _parse_alter_session(self) -> exp.AlterSession: 7636 """Parse ALTER SESSION SET/UNSET statements.""" 7637 if self._match(TokenType.SET): 7638 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7639 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7640 7641 self._match_text_seq("UNSET") 7642 expressions = self._parse_csv( 7643 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7644 ) 7645 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7646 7647 def _parse_alter(self) -> exp.Alter | exp.Command: 7648 start = self._prev 7649 7650 alter_token = self._match_set(self.ALTERABLES) and self._prev 7651 if not alter_token: 7652 return self._parse_as_command(start) 7653 7654 exists = self._parse_exists() 7655 only = self._match_text_seq("ONLY") 7656 7657 if alter_token.token_type == TokenType.SESSION: 7658 this = None 7659 check = None 7660 cluster = None 7661 else: 7662 this = self._parse_table(schema=True) 7663 check = self._match_text_seq("WITH", "CHECK") 7664 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7665 7666 if self._next: 7667 self._advance() 7668 7669 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7670 if parser: 7671 actions = ensure_list(parser(self)) 7672 not_valid = self._match_text_seq("NOT", "VALID") 7673 options = self._parse_csv(self._parse_property) 7674 7675 if not self._curr and actions: 7676 return self.expression( 7677 exp.Alter, 7678 this=this, 7679 kind=alter_token.text.upper(), 7680 exists=exists, 7681 actions=actions, 7682 only=only, 7683 options=options, 7684 cluster=cluster, 7685 not_valid=not_valid, 7686 check=check, 7687 ) 7688 7689 return self._parse_as_command(start) 7690 7691 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7692 start = self._prev 7693 # https://duckdb.org/docs/sql/statements/analyze 7694 if not self._curr: 7695 return self.expression(exp.Analyze) 7696 7697 options = [] 7698 while self._match_texts(self.ANALYZE_STYLES): 7699 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7700 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7701 else: 7702 options.append(self._prev.text.upper()) 7703 7704 this: t.Optional[exp.Expression] = None 7705 inner_expression: t.Optional[exp.Expression] = None 7706 7707 kind = self._curr and self._curr.text.upper() 7708 7709 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7710 this = self._parse_table_parts() 7711 elif self._match_text_seq("TABLES"): 7712 if self._match_set((TokenType.FROM, TokenType.IN)): 7713 kind = f"{kind} {self._prev.text.upper()}" 7714 this = self._parse_table(schema=True, is_db_reference=True) 7715 elif self._match_text_seq("DATABASE"): 7716 this = self._parse_table(schema=True, is_db_reference=True) 7717 elif self._match_text_seq("CLUSTER"): 7718 this = self._parse_table() 7719 # Try matching inner expr keywords before fallback to parse table. 7720 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7721 kind = None 7722 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7723 else: 7724 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7725 kind = None 7726 this = self._parse_table_parts() 7727 7728 partition = self._try_parse(self._parse_partition) 7729 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7730 return self._parse_as_command(start) 7731 7732 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7733 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7734 "WITH", "ASYNC", "MODE" 7735 ): 7736 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7737 else: 7738 mode = None 7739 7740 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7741 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7742 7743 properties = self._parse_properties() 7744 return self.expression( 7745 exp.Analyze, 7746 kind=kind, 7747 this=this, 7748 mode=mode, 7749 partition=partition, 7750 properties=properties, 7751 expression=inner_expression, 7752 options=options, 7753 ) 7754 7755 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7756 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7757 this = None 7758 kind = self._prev.text.upper() 7759 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7760 expressions = [] 7761 7762 if not self._match_text_seq("STATISTICS"): 7763 self.raise_error("Expecting token STATISTICS") 7764 7765 if self._match_text_seq("NOSCAN"): 7766 this = "NOSCAN" 7767 elif self._match(TokenType.FOR): 7768 if self._match_text_seq("ALL", "COLUMNS"): 7769 this = "FOR ALL COLUMNS" 7770 if self._match_texts("COLUMNS"): 7771 this = "FOR COLUMNS" 7772 expressions = self._parse_csv(self._parse_column_reference) 7773 elif self._match_text_seq("SAMPLE"): 7774 sample = self._parse_number() 7775 expressions = [ 7776 self.expression( 7777 exp.AnalyzeSample, 7778 sample=sample, 7779 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7780 ) 7781 ] 7782 7783 return self.expression( 7784 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7785 ) 7786 7787 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7788 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7789 kind = None 7790 this = None 7791 expression: t.Optional[exp.Expression] = None 7792 if self._match_text_seq("REF", "UPDATE"): 7793 kind = "REF" 7794 this = "UPDATE" 7795 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7796 this = "UPDATE SET DANGLING TO NULL" 7797 elif self._match_text_seq("STRUCTURE"): 7798 kind = "STRUCTURE" 7799 if self._match_text_seq("CASCADE", "FAST"): 7800 this = "CASCADE FAST" 7801 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7802 ("ONLINE", "OFFLINE") 7803 ): 7804 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7805 expression = self._parse_into() 7806 7807 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7808 7809 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7810 this = self._prev.text.upper() 7811 if self._match_text_seq("COLUMNS"): 7812 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7813 return None 7814 7815 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7816 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7817 if self._match_text_seq("STATISTICS"): 7818 return self.expression(exp.AnalyzeDelete, kind=kind) 7819 return None 7820 7821 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7822 if self._match_text_seq("CHAINED", "ROWS"): 7823 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7824 return None 7825 7826 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7827 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7828 this = self._prev.text.upper() 7829 expression: t.Optional[exp.Expression] = None 7830 expressions = [] 7831 update_options = None 7832 7833 if self._match_text_seq("HISTOGRAM", "ON"): 7834 expressions = self._parse_csv(self._parse_column_reference) 7835 with_expressions = [] 7836 while self._match(TokenType.WITH): 7837 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7838 if self._match_texts(("SYNC", "ASYNC")): 7839 if self._match_text_seq("MODE", advance=False): 7840 with_expressions.append(f"{self._prev.text.upper()} MODE") 7841 self._advance() 7842 else: 7843 buckets = self._parse_number() 7844 if self._match_text_seq("BUCKETS"): 7845 with_expressions.append(f"{buckets} BUCKETS") 7846 if with_expressions: 7847 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7848 7849 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7850 TokenType.UPDATE, advance=False 7851 ): 7852 update_options = self._prev.text.upper() 7853 self._advance() 7854 elif self._match_text_seq("USING", "DATA"): 7855 expression = self.expression(exp.UsingData, this=self._parse_string()) 7856 7857 return self.expression( 7858 exp.AnalyzeHistogram, 7859 this=this, 7860 expressions=expressions, 7861 expression=expression, 7862 update_options=update_options, 7863 ) 7864 7865 def _parse_merge(self) -> exp.Merge: 7866 self._match(TokenType.INTO) 7867 target = self._parse_table() 7868 7869 if target and self._match(TokenType.ALIAS, advance=False): 7870 target.set("alias", self._parse_table_alias()) 7871 7872 self._match(TokenType.USING) 7873 using = self._parse_table() 7874 7875 self._match(TokenType.ON) 7876 on = self._parse_assignment() 7877 7878 return self.expression( 7879 exp.Merge, 7880 this=target, 7881 using=using, 7882 on=on, 7883 whens=self._parse_when_matched(), 7884 returning=self._parse_returning(), 7885 ) 7886 7887 def _parse_when_matched(self) -> exp.Whens: 7888 whens = [] 7889 7890 while self._match(TokenType.WHEN): 7891 matched = not self._match(TokenType.NOT) 7892 self._match_text_seq("MATCHED") 7893 source = ( 7894 False 7895 if self._match_text_seq("BY", "TARGET") 7896 else self._match_text_seq("BY", "SOURCE") 7897 ) 7898 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7899 7900 self._match(TokenType.THEN) 7901 7902 if self._match(TokenType.INSERT): 7903 this = self._parse_star() 7904 if this: 7905 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7906 else: 7907 then = self.expression( 7908 exp.Insert, 7909 this=exp.var("ROW") 7910 if self._match_text_seq("ROW") 7911 else self._parse_value(values=False), 7912 expression=self._match_text_seq("VALUES") and self._parse_value(), 7913 ) 7914 elif self._match(TokenType.UPDATE): 7915 expressions = self._parse_star() 7916 if expressions: 7917 then = self.expression(exp.Update, expressions=expressions) 7918 else: 7919 then = self.expression( 7920 exp.Update, 7921 expressions=self._match(TokenType.SET) 7922 and self._parse_csv(self._parse_equality), 7923 ) 7924 elif self._match(TokenType.DELETE): 7925 then = self.expression(exp.Var, this=self._prev.text) 7926 else: 7927 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7928 7929 whens.append( 7930 self.expression( 7931 exp.When, 7932 matched=matched, 7933 source=source, 7934 condition=condition, 7935 then=then, 7936 ) 7937 ) 7938 return self.expression(exp.Whens, expressions=whens) 7939 7940 def _parse_show(self) -> t.Optional[exp.Expression]: 7941 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7942 if parser: 7943 return parser(self) 7944 return self._parse_as_command(self._prev) 7945 7946 def _parse_set_item_assignment( 7947 self, kind: t.Optional[str] = None 7948 ) -> t.Optional[exp.Expression]: 7949 index = self._index 7950 7951 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7952 return self._parse_set_transaction(global_=kind == "GLOBAL") 7953 7954 left = self._parse_primary() or self._parse_column() 7955 assignment_delimiter = self._match_texts(("=", "TO")) 7956 7957 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7958 self._retreat(index) 7959 return None 7960 7961 right = self._parse_statement() or self._parse_id_var() 7962 if isinstance(right, (exp.Column, exp.Identifier)): 7963 right = exp.var(right.name) 7964 7965 this = self.expression(exp.EQ, this=left, expression=right) 7966 return self.expression(exp.SetItem, this=this, kind=kind) 7967 7968 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7969 self._match_text_seq("TRANSACTION") 7970 characteristics = self._parse_csv( 7971 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7972 ) 7973 return self.expression( 7974 exp.SetItem, 7975 expressions=characteristics, 7976 kind="TRANSACTION", 7977 **{"global": global_}, # type: ignore 7978 ) 7979 7980 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7981 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7982 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7983 7984 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7985 index = self._index 7986 set_ = self.expression( 7987 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7988 ) 7989 7990 if self._curr: 7991 self._retreat(index) 7992 return self._parse_as_command(self._prev) 7993 7994 return set_ 7995 7996 def _parse_var_from_options( 7997 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7998 ) -> t.Optional[exp.Var]: 7999 start = self._curr 8000 if not start: 8001 return None 8002 8003 option = start.text.upper() 8004 continuations = options.get(option) 8005 8006 index = self._index 8007 self._advance() 8008 for keywords in continuations or []: 8009 if isinstance(keywords, str): 8010 keywords = (keywords,) 8011 8012 if self._match_text_seq(*keywords): 8013 option = f"{option} {' '.join(keywords)}" 8014 break 8015 else: 8016 if continuations or continuations is None: 8017 if raise_unmatched: 8018 self.raise_error(f"Unknown option {option}") 8019 8020 self._retreat(index) 8021 return None 8022 8023 return exp.var(option) 8024 8025 def _parse_as_command(self, start: Token) -> exp.Command: 8026 while self._curr: 8027 self._advance() 8028 text = self._find_sql(start, self._prev) 8029 size = len(start.text) 8030 self._warn_unsupported() 8031 return exp.Command(this=text[:size], expression=text[size:]) 8032 8033 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8034 settings = [] 8035 8036 self._match_l_paren() 8037 kind = self._parse_id_var() 8038 8039 if self._match(TokenType.L_PAREN): 8040 while True: 8041 key = self._parse_id_var() 8042 value = self._parse_primary() 8043 if not key and value is None: 8044 break 8045 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8046 self._match(TokenType.R_PAREN) 8047 8048 self._match_r_paren() 8049 8050 return self.expression( 8051 exp.DictProperty, 8052 this=this, 8053 kind=kind.this if kind else None, 8054 settings=settings, 8055 ) 8056 8057 def _parse_dict_range(self, this: str) -> exp.DictRange: 8058 self._match_l_paren() 8059 has_min = self._match_text_seq("MIN") 8060 if has_min: 8061 min = self._parse_var() or self._parse_primary() 8062 self._match_text_seq("MAX") 8063 max = self._parse_var() or self._parse_primary() 8064 else: 8065 max = self._parse_var() or self._parse_primary() 8066 min = exp.Literal.number(0) 8067 self._match_r_paren() 8068 return self.expression(exp.DictRange, this=this, min=min, max=max) 8069 8070 def _parse_comprehension( 8071 self, this: t.Optional[exp.Expression] 8072 ) -> t.Optional[exp.Comprehension]: 8073 index = self._index 8074 expression = self._parse_column() 8075 if not self._match(TokenType.IN): 8076 self._retreat(index - 1) 8077 return None 8078 iterator = self._parse_column() 8079 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8080 return self.expression( 8081 exp.Comprehension, 8082 this=this, 8083 expression=expression, 8084 iterator=iterator, 8085 condition=condition, 8086 ) 8087 8088 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8089 if self._match(TokenType.HEREDOC_STRING): 8090 return self.expression(exp.Heredoc, this=self._prev.text) 8091 8092 if not self._match_text_seq("$"): 8093 return None 8094 8095 tags = ["$"] 8096 tag_text = None 8097 8098 if self._is_connected(): 8099 self._advance() 8100 tags.append(self._prev.text.upper()) 8101 else: 8102 self.raise_error("No closing $ found") 8103 8104 if tags[-1] != "$": 8105 if self._is_connected() and self._match_text_seq("$"): 8106 tag_text = tags[-1] 8107 tags.append("$") 8108 else: 8109 self.raise_error("No closing $ found") 8110 8111 heredoc_start = self._curr 8112 8113 while self._curr: 8114 if self._match_text_seq(*tags, advance=False): 8115 this = self._find_sql(heredoc_start, self._prev) 8116 self._advance(len(tags)) 8117 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8118 8119 self._advance() 8120 8121 self.raise_error(f"No closing {''.join(tags)} found") 8122 return None 8123 8124 def _find_parser( 8125 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8126 ) -> t.Optional[t.Callable]: 8127 if not self._curr: 8128 return None 8129 8130 index = self._index 8131 this = [] 8132 while True: 8133 # The current token might be multiple words 8134 curr = self._curr.text.upper() 8135 key = curr.split(" ") 8136 this.append(curr) 8137 8138 self._advance() 8139 result, trie = in_trie(trie, key) 8140 if result == TrieResult.FAILED: 8141 break 8142 8143 if result == TrieResult.EXISTS: 8144 subparser = parsers[" ".join(this)] 8145 return subparser 8146 8147 self._retreat(index) 8148 return None 8149 8150 def _match(self, token_type, advance=True, expression=None): 8151 if not self._curr: 8152 return None 8153 8154 if self._curr.token_type == token_type: 8155 if advance: 8156 self._advance() 8157 self._add_comments(expression) 8158 return True 8159 8160 return None 8161 8162 def _match_set(self, types, advance=True): 8163 if not self._curr: 8164 return None 8165 8166 if self._curr.token_type in types: 8167 if advance: 8168 self._advance() 8169 return True 8170 8171 return None 8172 8173 def _match_pair(self, token_type_a, token_type_b, advance=True): 8174 if not self._curr or not self._next: 8175 return None 8176 8177 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8178 if advance: 8179 self._advance(2) 8180 return True 8181 8182 return None 8183 8184 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8185 if not self._match(TokenType.L_PAREN, expression=expression): 8186 self.raise_error("Expecting (") 8187 8188 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8189 if not self._match(TokenType.R_PAREN, expression=expression): 8190 self.raise_error("Expecting )") 8191 8192 def _match_texts(self, texts, advance=True): 8193 if ( 8194 self._curr 8195 and self._curr.token_type != TokenType.STRING 8196 and self._curr.text.upper() in texts 8197 ): 8198 if advance: 8199 self._advance() 8200 return True 8201 return None 8202 8203 def _match_text_seq(self, *texts, advance=True): 8204 index = self._index 8205 for text in texts: 8206 if ( 8207 self._curr 8208 and self._curr.token_type != TokenType.STRING 8209 and self._curr.text.upper() == text 8210 ): 8211 self._advance() 8212 else: 8213 self._retreat(index) 8214 return None 8215 8216 if not advance: 8217 self._retreat(index) 8218 8219 return True 8220 8221 def _replace_lambda( 8222 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8223 ) -> t.Optional[exp.Expression]: 8224 if not node: 8225 return node 8226 8227 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8228 8229 for column in node.find_all(exp.Column): 8230 typ = lambda_types.get(column.parts[0].name) 8231 if typ is not None: 8232 dot_or_id = column.to_dot() if column.table else column.this 8233 8234 if typ: 8235 dot_or_id = self.expression( 8236 exp.Cast, 8237 this=dot_or_id, 8238 to=typ, 8239 ) 8240 8241 parent = column.parent 8242 8243 while isinstance(parent, exp.Dot): 8244 if not isinstance(parent.parent, exp.Dot): 8245 parent.replace(dot_or_id) 8246 break 8247 parent = parent.parent 8248 else: 8249 if column is node: 8250 node = dot_or_id 8251 else: 8252 column.replace(dot_or_id) 8253 return node 8254 8255 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8256 start = self._prev 8257 8258 # Not to be confused with TRUNCATE(number, decimals) function call 8259 if self._match(TokenType.L_PAREN): 8260 self._retreat(self._index - 2) 8261 return self._parse_function() 8262 8263 # Clickhouse supports TRUNCATE DATABASE as well 8264 is_database = self._match(TokenType.DATABASE) 8265 8266 self._match(TokenType.TABLE) 8267 8268 exists = self._parse_exists(not_=False) 8269 8270 expressions = self._parse_csv( 8271 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8272 ) 8273 8274 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8275 8276 if self._match_text_seq("RESTART", "IDENTITY"): 8277 identity = "RESTART" 8278 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8279 identity = "CONTINUE" 8280 else: 8281 identity = None 8282 8283 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8284 option = self._prev.text 8285 else: 8286 option = None 8287 8288 partition = self._parse_partition() 8289 8290 # Fallback case 8291 if self._curr: 8292 return self._parse_as_command(start) 8293 8294 return self.expression( 8295 exp.TruncateTable, 8296 expressions=expressions, 8297 is_database=is_database, 8298 exists=exists, 8299 cluster=cluster, 8300 identity=identity, 8301 option=option, 8302 partition=partition, 8303 ) 8304 8305 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8306 this = self._parse_ordered(self._parse_opclass) 8307 8308 if not self._match(TokenType.WITH): 8309 return this 8310 8311 op = self._parse_var(any_token=True) 8312 8313 return self.expression(exp.WithOperator, this=this, op=op) 8314 8315 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8316 self._match(TokenType.EQ) 8317 self._match(TokenType.L_PAREN) 8318 8319 opts: t.List[t.Optional[exp.Expression]] = [] 8320 option: exp.Expression | None 8321 while self._curr and not self._match(TokenType.R_PAREN): 8322 if self._match_text_seq("FORMAT_NAME", "="): 8323 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8324 option = self._parse_format_name() 8325 else: 8326 option = self._parse_property() 8327 8328 if option is None: 8329 self.raise_error("Unable to parse option") 8330 break 8331 8332 opts.append(option) 8333 8334 return opts 8335 8336 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8337 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8338 8339 options = [] 8340 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8341 option = self._parse_var(any_token=True) 8342 prev = self._prev.text.upper() 8343 8344 # Different dialects might separate options and values by white space, "=" and "AS" 8345 self._match(TokenType.EQ) 8346 self._match(TokenType.ALIAS) 8347 8348 param = self.expression(exp.CopyParameter, this=option) 8349 8350 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8351 TokenType.L_PAREN, advance=False 8352 ): 8353 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8354 param.set("expressions", self._parse_wrapped_options()) 8355 elif prev == "FILE_FORMAT": 8356 # T-SQL's external file format case 8357 param.set("expression", self._parse_field()) 8358 else: 8359 param.set("expression", self._parse_unquoted_field()) 8360 8361 options.append(param) 8362 self._match(sep) 8363 8364 return options 8365 8366 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8367 expr = self.expression(exp.Credentials) 8368 8369 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8370 expr.set("storage", self._parse_field()) 8371 if self._match_text_seq("CREDENTIALS"): 8372 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8373 creds = ( 8374 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8375 ) 8376 expr.set("credentials", creds) 8377 if self._match_text_seq("ENCRYPTION"): 8378 expr.set("encryption", self._parse_wrapped_options()) 8379 if self._match_text_seq("IAM_ROLE"): 8380 expr.set("iam_role", self._parse_field()) 8381 if self._match_text_seq("REGION"): 8382 expr.set("region", self._parse_field()) 8383 8384 return expr 8385 8386 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8387 return self._parse_field() 8388 8389 def _parse_copy(self) -> exp.Copy | exp.Command: 8390 start = self._prev 8391 8392 self._match(TokenType.INTO) 8393 8394 this = ( 8395 self._parse_select(nested=True, parse_subquery_alias=False) 8396 if self._match(TokenType.L_PAREN, advance=False) 8397 else self._parse_table(schema=True) 8398 ) 8399 8400 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8401 8402 files = self._parse_csv(self._parse_file_location) 8403 if self._match(TokenType.EQ, advance=False): 8404 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8405 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8406 # list via `_parse_wrapped(..)` below. 8407 self._advance(-1) 8408 files = [] 8409 8410 credentials = self._parse_credentials() 8411 8412 self._match_text_seq("WITH") 8413 8414 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8415 8416 # Fallback case 8417 if self._curr: 8418 return self._parse_as_command(start) 8419 8420 return self.expression( 8421 exp.Copy, 8422 this=this, 8423 kind=kind, 8424 credentials=credentials, 8425 files=files, 8426 params=params, 8427 ) 8428 8429 def _parse_normalize(self) -> exp.Normalize: 8430 return self.expression( 8431 exp.Normalize, 8432 this=self._parse_bitwise(), 8433 form=self._match(TokenType.COMMA) and self._parse_var(), 8434 ) 8435 8436 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8437 args = self._parse_csv(lambda: self._parse_lambda()) 8438 8439 this = seq_get(args, 0) 8440 decimals = seq_get(args, 1) 8441 8442 return expr_type( 8443 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8444 ) 8445 8446 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8447 star_token = self._prev 8448 8449 if self._match_text_seq("COLUMNS", "(", advance=False): 8450 this = self._parse_function() 8451 if isinstance(this, exp.Columns): 8452 this.set("unpack", True) 8453 return this 8454 8455 return self.expression( 8456 exp.Star, 8457 **{ # type: ignore 8458 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8459 "replace": self._parse_star_op("REPLACE"), 8460 "rename": self._parse_star_op("RENAME"), 8461 }, 8462 ).update_positions(star_token) 8463 8464 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8465 privilege_parts = [] 8466 8467 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8468 # (end of privilege list) or L_PAREN (start of column list) are met 8469 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8470 privilege_parts.append(self._curr.text.upper()) 8471 self._advance() 8472 8473 this = exp.var(" ".join(privilege_parts)) 8474 expressions = ( 8475 self._parse_wrapped_csv(self._parse_column) 8476 if self._match(TokenType.L_PAREN, advance=False) 8477 else None 8478 ) 8479 8480 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8481 8482 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8483 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8484 principal = self._parse_id_var() 8485 8486 if not principal: 8487 return None 8488 8489 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8490 8491 def _parse_grant_revoke_common( 8492 self, 8493 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8494 privileges = self._parse_csv(self._parse_grant_privilege) 8495 8496 self._match(TokenType.ON) 8497 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8498 8499 # Attempt to parse the securable e.g. MySQL allows names 8500 # such as "foo.*", "*.*" which are not easily parseable yet 8501 securable = self._try_parse(self._parse_table_parts) 8502 8503 return privileges, kind, securable 8504 8505 def _parse_grant(self) -> exp.Grant | exp.Command: 8506 start = self._prev 8507 8508 privileges, kind, securable = self._parse_grant_revoke_common() 8509 8510 if not securable or not self._match_text_seq("TO"): 8511 return self._parse_as_command(start) 8512 8513 principals = self._parse_csv(self._parse_grant_principal) 8514 8515 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8516 8517 if self._curr: 8518 return self._parse_as_command(start) 8519 8520 return self.expression( 8521 exp.Grant, 8522 privileges=privileges, 8523 kind=kind, 8524 securable=securable, 8525 principals=principals, 8526 grant_option=grant_option, 8527 ) 8528 8529 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8530 start = self._prev 8531 8532 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8533 8534 privileges, kind, securable = self._parse_grant_revoke_common() 8535 8536 if not securable or not self._match_text_seq("FROM"): 8537 return self._parse_as_command(start) 8538 8539 principals = self._parse_csv(self._parse_grant_principal) 8540 8541 cascade = None 8542 if self._match_texts(("CASCADE", "RESTRICT")): 8543 cascade = self._prev.text.upper() 8544 8545 if self._curr: 8546 return self._parse_as_command(start) 8547 8548 return self.expression( 8549 exp.Revoke, 8550 privileges=privileges, 8551 kind=kind, 8552 securable=securable, 8553 principals=principals, 8554 grant_option=grant_option, 8555 cascade=cascade, 8556 ) 8557 8558 def _parse_overlay(self) -> exp.Overlay: 8559 return self.expression( 8560 exp.Overlay, 8561 **{ # type: ignore 8562 "this": self._parse_bitwise(), 8563 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8564 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8565 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8566 }, 8567 ) 8568 8569 def _parse_format_name(self) -> exp.Property: 8570 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8571 # for FILE_FORMAT = <format_name> 8572 return self.expression( 8573 exp.Property, 8574 this=exp.var("FORMAT_NAME"), 8575 value=self._parse_string() or self._parse_table_parts(), 8576 ) 8577 8578 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8579 args: t.List[exp.Expression] = [] 8580 8581 if self._match(TokenType.DISTINCT): 8582 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8583 self._match(TokenType.COMMA) 8584 8585 args.extend(self._parse_csv(self._parse_assignment)) 8586 8587 return self.expression( 8588 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8589 ) 8590 8591 def _identifier_expression( 8592 self, token: t.Optional[Token] = None, **kwargs: t.Any 8593 ) -> exp.Identifier: 8594 token = token or self._prev 8595 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8596 expression.update_positions(token) 8597 return expression 8598 8599 def _build_pipe_cte( 8600 self, 8601 query: exp.Query, 8602 expressions: t.List[exp.Expression], 8603 alias_cte: t.Optional[exp.TableAlias] = None, 8604 ) -> exp.Select: 8605 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8606 if alias_cte: 8607 new_cte = alias_cte 8608 else: 8609 self._pipe_cte_counter += 1 8610 new_cte = f"__tmp{self._pipe_cte_counter}" 8611 8612 with_ = query.args.get("with") 8613 ctes = with_.pop() if with_ else None 8614 8615 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8616 if ctes: 8617 new_select.set("with", ctes) 8618 8619 return new_select.with_(new_cte, as_=query, copy=False) 8620 8621 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8622 select = self._parse_select(consume_pipe=False) 8623 if not select: 8624 return query 8625 8626 return self._build_pipe_cte( 8627 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8628 ) 8629 8630 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8631 limit = self._parse_limit() 8632 offset = self._parse_offset() 8633 if limit: 8634 curr_limit = query.args.get("limit", limit) 8635 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8636 query.limit(limit, copy=False) 8637 if offset: 8638 curr_offset = query.args.get("offset") 8639 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8640 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8641 8642 return query 8643 8644 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8645 this = self._parse_assignment() 8646 if self._match_text_seq("GROUP", "AND", advance=False): 8647 return this 8648 8649 this = self._parse_alias(this) 8650 8651 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8652 return self._parse_ordered(lambda: this) 8653 8654 return this 8655 8656 def _parse_pipe_syntax_aggregate_group_order_by( 8657 self, query: exp.Select, group_by_exists: bool = True 8658 ) -> exp.Select: 8659 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8660 aggregates_or_groups, orders = [], [] 8661 for element in expr: 8662 if isinstance(element, exp.Ordered): 8663 this = element.this 8664 if isinstance(this, exp.Alias): 8665 element.set("this", this.args["alias"]) 8666 orders.append(element) 8667 else: 8668 this = element 8669 aggregates_or_groups.append(this) 8670 8671 if group_by_exists: 8672 query.select(*aggregates_or_groups, copy=False).group_by( 8673 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8674 copy=False, 8675 ) 8676 else: 8677 query.select(*aggregates_or_groups, append=False, copy=False) 8678 8679 if orders: 8680 return query.order_by(*orders, append=False, copy=False) 8681 8682 return query 8683 8684 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8685 self._match_text_seq("AGGREGATE") 8686 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8687 8688 if self._match(TokenType.GROUP_BY) or ( 8689 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8690 ): 8691 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8692 8693 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8694 8695 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8696 first_setop = self.parse_set_operation(this=query) 8697 if not first_setop: 8698 return None 8699 8700 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8701 expr = self._parse_paren() 8702 return expr.assert_is(exp.Subquery).unnest() if expr else None 8703 8704 first_setop.this.pop() 8705 8706 setops = [ 8707 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8708 *self._parse_csv(_parse_and_unwrap_query), 8709 ] 8710 8711 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8712 with_ = query.args.get("with") 8713 ctes = with_.pop() if with_ else None 8714 8715 if isinstance(first_setop, exp.Union): 8716 query = query.union(*setops, copy=False, **first_setop.args) 8717 elif isinstance(first_setop, exp.Except): 8718 query = query.except_(*setops, copy=False, **first_setop.args) 8719 else: 8720 query = query.intersect(*setops, copy=False, **first_setop.args) 8721 8722 query.set("with", ctes) 8723 8724 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8725 8726 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8727 join = self._parse_join() 8728 if not join: 8729 return None 8730 8731 if isinstance(query, exp.Select): 8732 return query.join(join, copy=False) 8733 8734 return query 8735 8736 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8737 pivots = self._parse_pivots() 8738 if not pivots: 8739 return query 8740 8741 from_ = query.args.get("from") 8742 if from_: 8743 from_.this.set("pivots", pivots) 8744 8745 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8746 8747 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8748 self._match_text_seq("EXTEND") 8749 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8750 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8751 8752 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8753 sample = self._parse_table_sample() 8754 8755 with_ = query.args.get("with") 8756 if with_: 8757 with_.expressions[-1].this.set("sample", sample) 8758 else: 8759 query.set("sample", sample) 8760 8761 return query 8762 8763 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8764 if isinstance(query, exp.Subquery): 8765 query = exp.select("*").from_(query, copy=False) 8766 8767 if not query.args.get("from"): 8768 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8769 8770 while self._match(TokenType.PIPE_GT): 8771 start = self._curr 8772 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8773 if not parser: 8774 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8775 # keywords, making it tricky to disambiguate them without lookahead. The approach 8776 # here is to try and parse a set operation and if that fails, then try to parse a 8777 # join operator. If that fails as well, then the operator is not supported. 8778 parsed_query = self._parse_pipe_syntax_set_operator(query) 8779 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8780 if not parsed_query: 8781 self._retreat(start) 8782 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8783 break 8784 query = parsed_query 8785 else: 8786 query = parser(self, query) 8787 8788 return query 8789 8790 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8791 vars = self._parse_csv(self._parse_id_var) 8792 if not vars: 8793 return None 8794 8795 return self.expression( 8796 exp.DeclareItem, 8797 this=vars, 8798 kind=self._parse_types(), 8799 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8800 ) 8801 8802 def _parse_declare(self) -> exp.Declare | exp.Command: 8803 start = self._prev 8804 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8805 8806 if not expressions or self._curr: 8807 return self._parse_as_command(start) 8808 8809 return self.expression(exp.Declare, expressions=expressions) 8810 8811 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8812 exp_class = exp.Cast if strict else exp.TryCast 8813 8814 if exp_class == exp.TryCast: 8815 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8816 8817 return self.expression(exp_class, **kwargs) 8818 8819 def _parse_json_value(self) -> exp.JSONValue: 8820 this = self._parse_bitwise() 8821 self._match(TokenType.COMMA) 8822 path = self._parse_bitwise() 8823 8824 returning = self._match(TokenType.RETURNING) and self._parse_type() 8825 8826 return self.expression( 8827 exp.JSONValue, 8828 this=this, 8829 path=self.dialect.to_json_path(path), 8830 returning=returning, 8831 on_condition=self._parse_on_condition(), 8832 ) 8833 8834 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8835 def concat_exprs( 8836 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8837 ) -> exp.Expression: 8838 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8839 concat_exprs = [ 8840 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8841 ] 8842 node.set("expressions", concat_exprs) 8843 return node 8844 if len(exprs) == 1: 8845 return exprs[0] 8846 return self.expression(exp.Concat, expressions=args, safe=True) 8847 8848 args = self._parse_csv(self._parse_lambda) 8849 8850 if args: 8851 order = args[-1] if isinstance(args[-1], exp.Order) else None 8852 8853 if order: 8854 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8855 # remove 'expr' from exp.Order and add it back to args 8856 args[-1] = order.this 8857 order.set("this", concat_exprs(order.this, args)) 8858 8859 this = order or concat_exprs(args[0], args) 8860 else: 8861 this = None 8862 8863 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8864 8865 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1584 def __init__( 1585 self, 1586 error_level: t.Optional[ErrorLevel] = None, 1587 error_message_context: int = 100, 1588 max_errors: int = 3, 1589 dialect: DialectType = None, 1590 ): 1591 from sqlglot.dialects import Dialect 1592 1593 self.error_level = error_level or ErrorLevel.IMMEDIATE 1594 self.error_message_context = error_message_context 1595 self.max_errors = max_errors 1596 self.dialect = Dialect.get_or_raise(dialect) 1597 self.reset()
1610 def parse( 1611 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1612 ) -> t.List[t.Optional[exp.Expression]]: 1613 """ 1614 Parses a list of tokens and returns a list of syntax trees, one tree 1615 per parsed SQL statement. 1616 1617 Args: 1618 raw_tokens: The list of tokens. 1619 sql: The original SQL string, used to produce helpful debug messages. 1620 1621 Returns: 1622 The list of the produced syntax trees. 1623 """ 1624 return self._parse( 1625 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1626 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1628 def parse_into( 1629 self, 1630 expression_types: exp.IntoType, 1631 raw_tokens: t.List[Token], 1632 sql: t.Optional[str] = None, 1633 ) -> t.List[t.Optional[exp.Expression]]: 1634 """ 1635 Parses a list of tokens into a given Expression type. If a collection of Expression 1636 types is given instead, this method will try to parse the token list into each one 1637 of them, stopping at the first for which the parsing succeeds. 1638 1639 Args: 1640 expression_types: The expression type(s) to try and parse the token list into. 1641 raw_tokens: The list of tokens. 1642 sql: The original SQL string, used to produce helpful debug messages. 1643 1644 Returns: 1645 The target Expression. 1646 """ 1647 errors = [] 1648 for expression_type in ensure_list(expression_types): 1649 parser = self.EXPRESSION_PARSERS.get(expression_type) 1650 if not parser: 1651 raise TypeError(f"No parser registered for {expression_type}") 1652 1653 try: 1654 return self._parse(parser, raw_tokens, sql) 1655 except ParseError as e: 1656 e.errors[0]["into_expression"] = expression_type 1657 errors.append(e) 1658 1659 raise ParseError( 1660 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1661 errors=merge_errors(errors), 1662 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1702 def check_errors(self) -> None: 1703 """Logs or raises any found errors, depending on the chosen error level setting.""" 1704 if self.error_level == ErrorLevel.WARN: 1705 for error in self.errors: 1706 logger.error(str(error)) 1707 elif self.error_level == ErrorLevel.RAISE and self.errors: 1708 raise ParseError( 1709 concat_messages(self.errors, self.max_errors), 1710 errors=merge_errors(self.errors), 1711 )
Logs or raises any found errors, depending on the chosen error level setting.
1713 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1714 """ 1715 Appends an error in the list of recorded errors or raises it, depending on the chosen 1716 error level setting. 1717 """ 1718 token = token or self._curr or self._prev or Token.string("") 1719 start = token.start 1720 end = token.end + 1 1721 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1722 highlight = self.sql[start:end] 1723 end_context = self.sql[end : end + self.error_message_context] 1724 1725 error = ParseError.new( 1726 f"{message}. Line {token.line}, Col: {token.col}.\n" 1727 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1728 description=message, 1729 line=token.line, 1730 col=token.col, 1731 start_context=start_context, 1732 highlight=highlight, 1733 end_context=end_context, 1734 ) 1735 1736 if self.error_level == ErrorLevel.IMMEDIATE: 1737 raise error 1738 1739 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1741 def expression( 1742 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1743 ) -> E: 1744 """ 1745 Creates a new, validated Expression. 1746 1747 Args: 1748 exp_class: The expression class to instantiate. 1749 comments: An optional list of comments to attach to the expression. 1750 kwargs: The arguments to set for the expression along with their respective values. 1751 1752 Returns: 1753 The target expression. 1754 """ 1755 instance = exp_class(**kwargs) 1756 instance.add_comments(comments) if comments else self._add_comments(instance) 1757 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1764 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1765 """ 1766 Validates an Expression, making sure that all its mandatory arguments are set. 1767 1768 Args: 1769 expression: The expression to validate. 1770 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1771 1772 Returns: 1773 The validated expression. 1774 """ 1775 if self.error_level != ErrorLevel.IGNORE: 1776 for error_message in expression.error_messages(args): 1777 self.raise_error(error_message) 1778 1779 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4862 def parse_set_operation( 4863 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4864 ) -> t.Optional[exp.Expression]: 4865 start = self._index 4866 _, side_token, kind_token = self._parse_join_parts() 4867 4868 side = side_token.text if side_token else None 4869 kind = kind_token.text if kind_token else None 4870 4871 if not self._match_set(self.SET_OPERATIONS): 4872 self._retreat(start) 4873 return None 4874 4875 token_type = self._prev.token_type 4876 4877 if token_type == TokenType.UNION: 4878 operation: t.Type[exp.SetOperation] = exp.Union 4879 elif token_type == TokenType.EXCEPT: 4880 operation = exp.Except 4881 else: 4882 operation = exp.Intersect 4883 4884 comments = self._prev.comments 4885 4886 if self._match(TokenType.DISTINCT): 4887 distinct: t.Optional[bool] = True 4888 elif self._match(TokenType.ALL): 4889 distinct = False 4890 else: 4891 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4892 if distinct is None: 4893 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4894 4895 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4896 "STRICT", "CORRESPONDING" 4897 ) 4898 if self._match_text_seq("CORRESPONDING"): 4899 by_name = True 4900 if not side and not kind: 4901 kind = "INNER" 4902 4903 on_column_list = None 4904 if by_name and self._match_texts(("ON", "BY")): 4905 on_column_list = self._parse_wrapped_csv(self._parse_column) 4906 4907 expression = self._parse_select( 4908 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4909 ) 4910 4911 return self.expression( 4912 operation, 4913 comments=comments, 4914 this=this, 4915 distinct=distinct, 4916 by_name=by_name, 4917 expression=expression, 4918 side=side, 4919 kind=kind, 4920 on=on_column_list, 4921 )